AuroraRuntime/Source/Locale/Encoding/ConvertInternal.cpp
2021-09-06 14:08:37 +01:00

272 lines
9.3 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: ConvertInternal.cpp
Date: 2021-8-19
Author: Reece
***/
#include <RuntimeInternal.hpp>
#include "../Locale.hpp"
#include "Encoding.hpp"
#include "ConvertInternal.hpp"
#if !defined(AU_NO_CPPLOCALE)
#include <locale>
#include <codecvt>
#endif
namespace Aurora::Locale::Encoding
{
#if !defined(AU_NO_CPPLOCALE)
static std::wstring_convert<std::codecvt_utf8<wchar_t>> gUtf8Conv;
struct Utf16Converter_t : public std::codecvt<char16_t, char, std::mbstate_t>
{
Utf16Converter_t(std::size_t refs = 0) : codecvt(refs)
{}
};
struct Utf32Converter_t : public std::codecvt<char32_t, char, std::mbstate_t>
{
Utf32Converter_t(std::size_t refs = 0) : codecvt(refs)
{}
};
#else
using Utf16Converter_t = void;
using Utf32Converter_t = void;
#endif
template<typename charout_t>
void FlipEndianness(AuUInt8 *out, AuUInt32 count, bool isEncodingLE)
{
if (isEncodingLE == (Aurora::Build::kCurrentEndian == Aurora::Build::ECPUEndian::eCPULittle))
{
return;
}
for (AuUInt i = 0; i < count; i += sizeof(charout_t))
{
// TODO: Read/Write[LE/BE] macros
// Someone will moan about sizeof(T) != ABI guaranteed reads despite being legal under aarch+x86+x86_64
// So long as buffer.data !% alignof(T) (usually = sizeof(T)), we should be good
if constexpr (sizeof(charout_t) == 2)
{
auto word = reinterpret_cast<AuUInt16 *>(&out[i]);
AuUInt16 altEndian = *word;
AuUInt16 swapped = (altEndian & 0xFF) << 8 | (altEndian >> 8) & 0xFF;
*word = swapped;
}
else if constexpr (sizeof(charout_t) == 4)
{
auto word = reinterpret_cast<AuUInt32 *>(&out[i]);
AuUInt32 altEndian = *word;
AuUInt32 swapped = (altEndian & 0xFF) << 24 |
((altEndian >> 8) & 0xFF) << 16 |
((altEndian >> 16) & 0xFF) << 8 |
((altEndian >> 24) & 0xFF);
*word = swapped;
}
}
}
template<>
void FlipEndianness<AuUInt32>(AuUInt8 *out, AuUInt32 count, bool isEncodingLE);
template<>
void FlipEndianness<AuUInt16>(AuUInt8 *out, AuUInt32 count, bool isEncodingLE);
template<typename converter_t, typename charin_t>
static AuStreamReadWrittenPair_t TranslateOutUtfBuffer(AuUInt8 *in, AuUInt length, void *out, AuUInt outLen, bool endianLe = true)
{
#if defined(AU_NO_CPPLOCALE)
return {};
#else
std::mbstate_t mb {};
converter_t converter;
const charin_t *fromNext;
char *toNext;
FlipEndianness<charin_t>(in, length, endianLe);
converter.out(mb,
reinterpret_cast<const charin_t *>(in),
reinterpret_cast<const charin_t *>(in + length),
fromNext,
reinterpret_cast<char *>(out),
reinterpret_cast<char *>(out) + outLen,
toNext);
return {fromNext - reinterpret_cast<const charin_t *>(in), toNext - out};
#endif
}
template<typename converter_t, typename charout_t>
static AuStreamReadWrittenPair_t TranslateInUtfBuffer(const AuUInt8 *in, AuUInt length, AuUInt8 *out, AuUInt outLen, bool endianLe = true)
{
#if defined(AU_NO_CPPLOCALE)
return {};
#else
std::mbstate_t mb {};
converter_t converter;
const char *fromNext;
charout_t *toNext;
converter.in(mb,
reinterpret_cast<const char *>(in),
reinterpret_cast<const char *>(in) + length,
fromNext,
reinterpret_cast<charout_t *>(out),
reinterpret_cast<charout_t *>(out + outLen),
toNext);
auto done = toNext - reinterpret_cast<charout_t *>(out);
FlipEndianness<charout_t>(out, done, endianLe);
return {(const AuUInt8 *)fromNext - in, done * sizeof(charout_t)};
#endif
}
template<typename converter_t, typename charout_t>
static bool TranslateInUtfBuffer(const AuUInt8 *in, AuUInt length, AuList<AuUInt8> &out, bool endianLe = true)
{
out.resize(length * sizeof(charout_t));
auto len = TranslateInUtfBuffer<converter_t, charout_t>(in, length, out.data(), out.size(), endianLe);
if (len)
{
out.resize(len);
out.shrink_to_fit();
}
return len;
}
AuStreamReadWrittenPair_t EncodeUTF8Internal(const void *utf8, AuUInt32 utf8Length, void *binary, AuUInt32 binaryLength, ECodePage page)
{
AuStreamReadWrittenPair_t ret {};
auto readable = std::min(AuUInt(utf8Length), AuUInt(binaryLength));
AuList<AuUInt8> temp;
if (!binary)
{
temp.resize(utf8Length);
binary = temp.data();
binaryLength = temp.size();
}
switch (page)
{
default:
case ECodePage::eUnsupported:
return {};
case ECodePage::eUTF16:
case ECodePage::eUTF16BE:
ret = TranslateInUtfBuffer<Utf16Converter_t, char16_t>(reinterpret_cast<const AuUInt8 *>(utf8), utf8Length, reinterpret_cast<AuUInt8 *>(binary), binaryLength, page == ECodePage::eUTF16);
break;
case ECodePage::eUTF32:
case ECodePage::eUTF32BE:
ret = TranslateInUtfBuffer<Utf32Converter_t, char32_t>(reinterpret_cast<const AuUInt8 *>(utf8), utf8Length, reinterpret_cast<AuUInt8 *>(binary), binaryLength, page == ECodePage::eUTF32);
break;
case ECodePage::eUTF8:
if (utf8 && binary)
{
std::memcpy(binary, utf8, readable);
}
ret = AuMakePair(utf8Length, binary ? binaryLength : utf8Length);
break;
}
return ret;
}
AuStreamReadWrittenPair_t DecodeUTF8Internal(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utf8Max, ECodePage page)
{
AuStreamReadWrittenPair_t ret {};
AuList<AuUInt8> temp;
if (!utf8)
{
temp.resize(binaryLength * 4);
utf8 = temp.data();
utf8Max = temp.size();
}
AuList<AuUInt8> rw(reinterpret_cast<const AuUInt8 *>(binary), reinterpret_cast<const AuUInt8 *>(binary) + binaryLength);
auto readable = std::min(AuUInt(binaryLength), AuUInt(utf8Max));
switch (page)
{
default:
case ECodePage::eUnsupported:
return {};
case ECodePage::eUTF16:
case ECodePage::eUTF16BE:
ret = TranslateOutUtfBuffer<Utf16Converter_t, char16_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF16);
break;
case ECodePage::eUTF32:
case ECodePage::eUTF32BE:
ret = TranslateOutUtfBuffer<Utf32Converter_t, char32_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF32);
break;
case ECodePage::eUTF8:
if (utf8 && binary)
{
std::memcpy(utf8, binary, readable);
}
ret = AuMakePair(binaryLength, utf8 ? utf8Max : binaryLength);
break;
}
return ret;
}
AuStreamReadWrittenPair_t DecodeUTF8Internal(void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utf8Max, ECodePage page)
{
AuStreamReadWrittenPair_t ret {};
AuList<AuUInt8> temp;
if (!utf8)
{
temp.resize(binaryLength * 4);
utf8 = temp.data();
utf8Max = temp.size();
}
AuList<AuUInt8> rw(reinterpret_cast<const AuUInt8 *>(binary), reinterpret_cast<const AuUInt8 *>(binary) + binaryLength);
auto readable = std::min(AuUInt(binaryLength), AuUInt(utf8Max));
switch (page)
{
default:
case ECodePage::eUnsupported:
return {};
case ECodePage::eUTF16:
case ECodePage::eUTF16BE:
ret = TranslateOutUtfBuffer<Utf16Converter_t, char16_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF16);
break;
case ECodePage::eUTF32:
case ECodePage::eUTF32BE:
ret = TranslateOutUtfBuffer<Utf32Converter_t, char32_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF32);
break;
case ECodePage::eUTF8:
if (utf8 && binary)
{
std::memcpy(utf8, binary, readable);
}
ret = AuMakePair(binaryLength, utf8 ? utf8Max : binaryLength);
break;
}
return ret;
}
AuStreamReadWrittenPair_t STLCPToUTF8(ECodePage page, void *in, AuUInt32 length, void *utf8, AuUInt32 utf8Max)
{
return DecodeUTF8Internal(in, length, utf8, utf8Max, page);
}
AuStreamReadWrittenPair_t STLCPToUTF8(ECodePage page, const void *in, AuUInt32 length, void *utf8, AuUInt32 utf8Max)
{
return DecodeUTF8Internal(in, length, utf8, utf8Max, page);
}
AuStreamReadWrittenPair_t STLUTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen)
{
return EncodeUTF8Internal(utf8, utf8Length, cp, cpLen, page);
}
}