239 lines
8.5 KiB
C++
239 lines
8.5 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: ConvertInternal.cpp
|
|
Date: 2021-8-19
|
|
Author: Reece
|
|
***/
|
|
#include <RuntimeInternal.hpp>
|
|
#include "../Locale.hpp"
|
|
#include "Encoding.hpp"
|
|
#include "ConvertInternal.hpp"
|
|
|
|
#if !defined(AU_NO_CPPLOCALE)
|
|
#include <locale>
|
|
#include <codecvt>
|
|
#endif
|
|
|
|
namespace Aurora::Locale::Encoding
|
|
{
|
|
#if !defined(AU_NO_CPPLOCALE)
|
|
static std::wstring_convert<std::codecvt_utf8<wchar_t>> gUtf8Conv;
|
|
|
|
struct Utf16Converter_t : public std::codecvt<char16_t, char, std::mbstate_t>
|
|
{
|
|
Utf16Converter_t(std::size_t refs = 0) : codecvt(refs)
|
|
{}
|
|
};
|
|
struct Utf32Converter_t : public std::codecvt<char32_t, char, std::mbstate_t>
|
|
{
|
|
Utf32Converter_t(std::size_t refs = 0) : codecvt(refs)
|
|
{}
|
|
};
|
|
#else
|
|
using Utf16Converter_t = void;
|
|
using Utf32Converter_t = void;
|
|
#endif
|
|
|
|
template<typename charout_t>
|
|
void FlipEndianness(AuUInt8 *out, AuUInt32 count, bool isEncodingLE)
|
|
{
|
|
if (isEncodingLE == (Aurora::Build::kCurrentEndian == Aurora::Build::ECPUEndian::eCPULittle))
|
|
{
|
|
return;
|
|
}
|
|
|
|
for (AuUInt i = 0; i < count; i += sizeof(charout_t))
|
|
{
|
|
// TODO: Read/Write[LE/BE] macros
|
|
// Someone will moan about sizeof(T) != ABI guaranteed reads despite being legal under aarch+x86+x86_64
|
|
// So long as buffer.data !% alignof(T) (usually = sizeof(T)), we should be good
|
|
|
|
if constexpr (sizeof(charout_t) == 2)
|
|
{
|
|
auto word = reinterpret_cast<AuUInt16 *>(&out[i]);
|
|
AuUInt16 altEndian = *word;
|
|
AuUInt16 swapped = (altEndian & 0xFF) << 8 | (altEndian >> 8) & 0xFF;
|
|
*word = swapped;
|
|
}
|
|
else if constexpr (sizeof(charout_t) == 4)
|
|
{
|
|
auto word = reinterpret_cast<AuUInt32 *>(&out[i]);
|
|
AuUInt32 altEndian = *word;
|
|
AuUInt32 swapped = (altEndian & 0xFF) << 24 |
|
|
((altEndian >> 8) & 0xFF) << 16 |
|
|
((altEndian >> 16) & 0xFF) << 8 |
|
|
((altEndian >> 24) & 0xFF);
|
|
*word = swapped;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<>
|
|
void FlipEndianness<AuUInt32>(AuUInt8 *out, AuUInt32 count, bool isEncodingLE);
|
|
|
|
template<>
|
|
void FlipEndianness<AuUInt16>(AuUInt8 *out, AuUInt32 count, bool isEncodingLE);
|
|
|
|
template<typename converter_t, typename charin_t>
|
|
static AuStreamReadWrittenPair_t TranslateOutUtfBuffer(AuUInt8 *in, AuUInt length, void *out, AuUInt outLen, bool endianLe = true)
|
|
{
|
|
#if defined(AU_NO_CPPLOCALE)
|
|
return {};
|
|
#else
|
|
std::mbstate_t mb {};
|
|
converter_t converter;
|
|
const charin_t *fromNext;
|
|
char *toNext;
|
|
|
|
|
|
FlipEndianness<charin_t>(in, length, endianLe);
|
|
|
|
converter.out(mb,
|
|
reinterpret_cast<const charin_t *>(in),
|
|
reinterpret_cast<const charin_t *>(in + length),
|
|
fromNext,
|
|
reinterpret_cast<char *>(out),
|
|
reinterpret_cast<char *>(out) + outLen,
|
|
toNext);
|
|
|
|
return {fromNext - reinterpret_cast<const charin_t *>(in), toNext - out};
|
|
#endif
|
|
}
|
|
|
|
template<typename converter_t, typename charout_t>
|
|
static AuStreamReadWrittenPair_t TranslateInUtfBuffer(const AuUInt8 *in, AuUInt length, AuUInt8 *out, AuUInt outLen, bool endianLe = true)
|
|
{
|
|
#if defined(AU_NO_CPPLOCALE)
|
|
return {};
|
|
#else
|
|
std::mbstate_t mb {};
|
|
converter_t converter;
|
|
const char *fromNext;
|
|
charout_t *toNext;
|
|
|
|
converter.in(mb,
|
|
reinterpret_cast<const char *>(in),
|
|
reinterpret_cast<const char *>(in) + length,
|
|
fromNext,
|
|
reinterpret_cast<charout_t *>(out),
|
|
reinterpret_cast<charout_t *>(out + outLen),
|
|
toNext);
|
|
|
|
auto done = toNext - reinterpret_cast<charout_t *>(out);
|
|
FlipEndianness<charout_t>(out, done, endianLe);
|
|
return {(const AuUInt8 *)fromNext - in, done * sizeof(charout_t)};
|
|
#endif
|
|
}
|
|
|
|
template<typename converter_t, typename charout_t>
|
|
static bool TranslateInUtfBuffer(const AuUInt8 *in, AuUInt length, AuList<AuUInt8> &out, bool endianLe = true)
|
|
{
|
|
out.resize(length * sizeof(charout_t));
|
|
auto len = TranslateInUtfBuffer<converter_t, charout_t>(in, length, out.data(), out.size(), endianLe);
|
|
if (len)
|
|
{
|
|
out.resize(len);
|
|
out.shrink_to_fit();
|
|
}
|
|
return len;
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t EncodeUTF8Internal(const void *utf8, AuUInt32 ut8Length, void *binary, AuUInt32 binaryLength, ECodePage page)
|
|
{
|
|
AuStreamReadWrittenPair_t ret {};
|
|
auto readable = std::min(AuUInt(ut8Length), AuUInt(binaryLength));
|
|
|
|
switch (page)
|
|
{
|
|
default:
|
|
case ECodePage::eUnsupported:
|
|
return {};
|
|
case ECodePage::eUTF16:
|
|
case ECodePage::eUTF16BE:
|
|
ret = TranslateInUtfBuffer<Utf16Converter_t, char16_t>(reinterpret_cast<const AuUInt8 *>(utf8), ut8Length, reinterpret_cast<AuUInt8 *>(binary), binaryLength, page == ECodePage::eUTF16);
|
|
break;
|
|
case ECodePage::eUTF32:
|
|
case ECodePage::eUTF32BE:
|
|
ret = TranslateInUtfBuffer<Utf32Converter_t, char32_t>(reinterpret_cast<const AuUInt8 *>(utf8), ut8Length, reinterpret_cast<AuUInt8 *>(binary), binaryLength, page == ECodePage::eUTF32);
|
|
break;
|
|
case ECodePage::eUTF8:
|
|
std::memcpy(binary, utf8, readable);
|
|
ret = AuMakePair(readable, readable);
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t DecodeUTF8Internal(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utf8Max, ECodePage page)
|
|
{
|
|
AuStreamReadWrittenPair_t ret {};
|
|
AuList<AuUInt8> rw(reinterpret_cast<const AuUInt8 *>(binary), reinterpret_cast<const AuUInt8 *>(binary) + binaryLength);
|
|
|
|
auto readable = std::min(AuUInt(binaryLength), AuUInt(utf8Max));
|
|
|
|
switch (page)
|
|
{
|
|
default:
|
|
case ECodePage::eUnsupported:
|
|
return {};
|
|
case ECodePage::eUTF16:
|
|
case ECodePage::eUTF16BE:
|
|
ret = TranslateOutUtfBuffer<Utf16Converter_t, char16_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF16);
|
|
break;
|
|
case ECodePage::eUTF32:
|
|
case ECodePage::eUTF32BE:
|
|
ret = TranslateOutUtfBuffer<Utf32Converter_t, char32_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF32);
|
|
break;
|
|
case ECodePage::eUTF8:
|
|
std::memcpy(utf8, binary, readable);
|
|
ret = AuMakePair(readable, readable);
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t DecodeUTF8Internal(void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utf8Max, ECodePage page)
|
|
{
|
|
AuStreamReadWrittenPair_t ret {};
|
|
AuList<AuUInt8> rw(reinterpret_cast<const AuUInt8 *>(binary), reinterpret_cast<const AuUInt8 *>(binary) + binaryLength);
|
|
|
|
auto readable = std::min(AuUInt(binaryLength), AuUInt(utf8Max));
|
|
|
|
switch (page)
|
|
{
|
|
default:
|
|
case ECodePage::eUnsupported:
|
|
return {};
|
|
case ECodePage::eUTF16:
|
|
case ECodePage::eUTF16BE:
|
|
ret = TranslateOutUtfBuffer<Utf16Converter_t, char16_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF16);
|
|
break;
|
|
case ECodePage::eUTF32:
|
|
case ECodePage::eUTF32BE:
|
|
ret = TranslateOutUtfBuffer<Utf32Converter_t, char32_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF32);
|
|
break;
|
|
case ECodePage::eUTF8:
|
|
std::memcpy(utf8, binary, readable);
|
|
ret = AuMakePair(readable, readable);
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t STLCPToUTF8(ECodePage page, void *in, AuUInt32 length, void *utf8, AuUInt32 utf8Max)
|
|
{
|
|
return DecodeUTF8Internal(in, length, utf8, utf8Max, page);
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t STLCPToUTF8(ECodePage page, const void *in, AuUInt32 length, void *utf8, AuUInt32 utf8Max)
|
|
{
|
|
return DecodeUTF8Internal(in, length, utf8, utf8Max, page);
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t STLUTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen)
|
|
{
|
|
return EncodeUTF8Internal(utf8, utf8Length, cp, cpLen, page);
|
|
}
|
|
} |