Patch 1.5/2

This commit is contained in:
Reece Wilson 2021-09-06 14:03:45 +01:00
parent ba22b4573a
commit 02dc2d59cf
8 changed files with 145 additions and 73 deletions

View File

@ -46,11 +46,6 @@ namespace Aurora::IO::FS
return bytesWritten == length;
}
AUKN_SYM bool WriteString(const AuString &path, const AuString &str)
{
return WriteFile(path, str.data(), str.size());
}
AUKN_SYM bool ReadFile(const AuString &path, AuList<AuUInt8> &buffer)
{
auto file = fopen(NormalizePathRet(path).c_str(), "rb");
@ -78,19 +73,6 @@ namespace Aurora::IO::FS
return bytesRead == size;
}
AUKN_SYM bool ReadString(const AuString &path, AuString &buffer)
{
AuList<AuUInt8> buf;
if (!ReadFile(path, buf))
{
return false;
}
buffer = AuString(buf.begin(), buf.end());
return true;
}
AUKN_SYM bool FileExists(const AuString &path)
{
auto file = fopen(NormalizePathRet(path).c_str(), "rb");

View File

@ -204,7 +204,6 @@ namespace Aurora::IO::FS
}
}
AUKN_SYM bool ReadString(const AuString &path, AuString &buffer)
{
AuList<uint8_t> fileBuffer;

View File

@ -139,10 +139,18 @@ namespace Aurora::Locale::Encoding
return len;
}
AuStreamReadWrittenPair_t EncodeUTF8Internal(const void *utf8, AuUInt32 ut8Length, void *binary, AuUInt32 binaryLength, ECodePage page)
AuStreamReadWrittenPair_t EncodeUTF8Internal(const void *utf8, AuUInt32 utf8Length, void *binary, AuUInt32 binaryLength, ECodePage page)
{
AuStreamReadWrittenPair_t ret {};
auto readable = std::min(AuUInt(ut8Length), AuUInt(binaryLength));
auto readable = std::min(AuUInt(utf8Length), AuUInt(binaryLength));
AuList<AuUInt8> temp;
if (!binary)
{
temp.resize(utf8Length);
binary = temp.data();
binaryLength = temp.size();
}
switch (page)
{
@ -151,15 +159,18 @@ namespace Aurora::Locale::Encoding
return {};
case ECodePage::eUTF16:
case ECodePage::eUTF16BE:
ret = TranslateInUtfBuffer<Utf16Converter_t, char16_t>(reinterpret_cast<const AuUInt8 *>(utf8), ut8Length, reinterpret_cast<AuUInt8 *>(binary), binaryLength, page == ECodePage::eUTF16);
ret = TranslateInUtfBuffer<Utf16Converter_t, char16_t>(reinterpret_cast<const AuUInt8 *>(utf8), utf8Length, reinterpret_cast<AuUInt8 *>(binary), binaryLength, page == ECodePage::eUTF16);
break;
case ECodePage::eUTF32:
case ECodePage::eUTF32BE:
ret = TranslateInUtfBuffer<Utf32Converter_t, char32_t>(reinterpret_cast<const AuUInt8 *>(utf8), ut8Length, reinterpret_cast<AuUInt8 *>(binary), binaryLength, page == ECodePage::eUTF32);
ret = TranslateInUtfBuffer<Utf32Converter_t, char32_t>(reinterpret_cast<const AuUInt8 *>(utf8), utf8Length, reinterpret_cast<AuUInt8 *>(binary), binaryLength, page == ECodePage::eUTF32);
break;
case ECodePage::eUTF8:
if (utf8 && binary)
{
std::memcpy(binary, utf8, readable);
ret = AuMakePair(readable, readable);
}
ret = AuMakePair(utf8Length, binary ? binaryLength : utf8Length);
break;
}
@ -169,8 +180,16 @@ namespace Aurora::Locale::Encoding
AuStreamReadWrittenPair_t DecodeUTF8Internal(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utf8Max, ECodePage page)
{
AuStreamReadWrittenPair_t ret {};
AuList<AuUInt8> rw(reinterpret_cast<const AuUInt8 *>(binary), reinterpret_cast<const AuUInt8 *>(binary) + binaryLength);
AuList<AuUInt8> temp;
if (!utf8)
{
temp.resize(binaryLength * 4);
utf8 = temp.data();
utf8Max = temp.size();
}
AuList<AuUInt8> rw(reinterpret_cast<const AuUInt8 *>(binary), reinterpret_cast<const AuUInt8 *>(binary) + binaryLength);
auto readable = std::min(AuUInt(binaryLength), AuUInt(utf8Max));
switch (page)
@ -187,8 +206,11 @@ namespace Aurora::Locale::Encoding
ret = TranslateOutUtfBuffer<Utf32Converter_t, char32_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF32);
break;
case ECodePage::eUTF8:
if (utf8 && binary)
{
std::memcpy(utf8, binary, readable);
ret = AuMakePair(readable, readable);
}
ret = AuMakePair(binaryLength, utf8 ? utf8Max : binaryLength);
break;
}
return ret;
@ -197,8 +219,16 @@ namespace Aurora::Locale::Encoding
AuStreamReadWrittenPair_t DecodeUTF8Internal(void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utf8Max, ECodePage page)
{
AuStreamReadWrittenPair_t ret {};
AuList<AuUInt8> rw(reinterpret_cast<const AuUInt8 *>(binary), reinterpret_cast<const AuUInt8 *>(binary) + binaryLength);
AuList<AuUInt8> temp;
if (!utf8)
{
temp.resize(binaryLength * 4);
utf8 = temp.data();
utf8Max = temp.size();
}
AuList<AuUInt8> rw(reinterpret_cast<const AuUInt8 *>(binary), reinterpret_cast<const AuUInt8 *>(binary) + binaryLength);
auto readable = std::min(AuUInt(binaryLength), AuUInt(utf8Max));
switch (page)
@ -215,8 +245,11 @@ namespace Aurora::Locale::Encoding
ret = TranslateOutUtfBuffer<Utf32Converter_t, char32_t>(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF32);
break;
case ECodePage::eUTF8:
if (utf8 && binary)
{
std::memcpy(utf8, binary, readable);
ret = AuMakePair(readable, readable);
}
ret = AuMakePair(binaryLength, utf8 ? utf8Max : binaryLength);
break;
}
return ret;

View File

@ -40,8 +40,11 @@ namespace Aurora::Locale::Encoding
(page == ECodePage::eUTF8))
{
auto readable = std::min(length, utf8Max);
if (utf8 && in)
{
std::memcpy(utf8, in, readable);
return {readable, readable};
}
return {length, utf8 ? utf8Max : length};
}
ret = Win32CPToUTF8(page, in, length, utf8, utf8Max);
@ -62,8 +65,11 @@ namespace Aurora::Locale::Encoding
(page == ECodePage::eUTF8))
{
auto readable = std::min(length, utf8Max);
if (utf8 && in)
{
std::memcpy(utf8, in, readable);
return {readable, readable};
}
return {length, utf8 ? utf8Max : length};
}
ret = Win32CPToUTF8(page, in, length, utf8, utf8Max);
@ -84,8 +90,11 @@ namespace Aurora::Locale::Encoding
(page == ECodePage::eUTF8))
{
auto readable = std::min(utf8Length, cpLen);
if (utf8 && cp)
{
std::memcpy(cp, utf8, readable);
return {readable, readable};
}
return {utf8Length, utf8 ? cpLen : utf8Length};
}
ret = Win32UTF8ToCp(page, utf8, utf8Length, cp, cpLen);

View File

@ -12,19 +12,6 @@
namespace Aurora::Locale::Encoding
{
static bool CountReplacements(const wchar_t *blob, AuUInt32 &size, AuUInt32 &removed)
{
bool slowPath {};
removed = 0;
while ((size >= 1) && (blob[size - 1] == L'\uFFFD'))
{
size--;
removed++;
slowPath = true;
}
return slowPath;
}
static AuStreamReadWrittenPair_t Win32ConvertCpAToCPB(AuUInt32 cpA, AuUInt32 cpB, const void *in, AuUInt32 inLength, void *cpBlob, AuUInt32 cpLen)
{
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
@ -32,7 +19,6 @@ namespace Aurora::Locale::Encoding
{
return {};
}
// Get the UTF-16 character count of the cpA string in/inLength
auto chars = MultiByteToWideChar(cpA, 0, (LPCCH)in, inLength, NULL, 0);
@ -51,10 +37,7 @@ namespace Aurora::Locale::Encoding
// Convert the cpA buffer to UTF-16
MultiByteToWideChar(cpA, 0, (LPCCH)in, inLength, ret, chars);
// Ask me how much i like microsofts api
AuUInt32 utf16Recalc = chars;
AuUInt32 removed;
bool slowPath = CountReplacements(ret, utf16Recalc, removed);
if (!utf16Recalc)
{
@ -199,26 +182,16 @@ namespace Aurora::Locale::Encoding
// Convert the CP_UTF8 buffer to UTF-16
MultiByteToWideChar(CP_UTF8, 0, (LPCCH)in, inLength, (LPWSTR)utf16, chars);
// Ask me how much i like microsofts api
AuUInt32 utf16Recalc = chars;
AuUInt32 removed;
bool slowPath = CountReplacements((LPWSTR)utf16, utf16Recalc, removed);
if (!utf16Recalc)
{
return {};
}
// convert the shortened string with invalid surrogates back into a CP_UTF8 length
AuUInt32 read = inLength;
#if 0
if (slowPath)
#endif
{
read = WideCharToMultiByte(CP_UTF8, 0, (LPWSTR)utf16, utf16Recalc, NULL, 0, NULL, NULL);
read = WideCharToMultiByte(CP_UTF8, 0, (LPWSTR)utf16, chars, NULL, 0, NULL, NULL);
}
return {read, utf16Recalc};
return {read, chars};
#else
return {};
#endif

View File

@ -55,7 +55,8 @@ namespace Aurora::Locale::Encoding
AUKN_SYM AuStreamReadWrittenPair_t DecodeUTF8(const void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page)
{
out.resize(binaryLength);
auto aaa = DecodeUTF8(binary, binaryLength, nullptr, 0, page);
out.resize(aaa.second);
auto ret = DecodeUTF8(binary, binaryLength, out.data(), out.size(), page);
out.resize(ret.second);
return ret;
@ -63,7 +64,8 @@ namespace Aurora::Locale::Encoding
AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page)
{
out.resize(binaryLength);
auto aaa = DecodeUTF8(binary, binaryLength, nullptr, 0, page);
out.resize(aaa.second);
auto ret = DecodeUTF8(binary, binaryLength, out.data(), out.size(), page);
out.resize(ret.second);
return ret;

View File

@ -24,6 +24,69 @@ namespace Aurora::Locale::Encoding
using TypeIn_t = std::conditional_t<optimized, void *, const void *>;
using TypeCast_t = std::conditional_t<optimized, AuUInt8 *, const AuUInt8 *>;
static int GetLenSJISCodePoint(const AuUInt8 *in, AuUInt32 len)
{
if (len == 0) return 0;
auto b = in[0];
if (b >= 0x80)
{
if (b <= 0xDF)
{
if (len < 2) return 0;
else return 2;
}
else if (b <= 0xEF)
{
if (len < 3) return 0;
else return 3;
}
else
{
if (len < 4) return 0;
else return 4;
}
}
return 1;
}
static int GetLenSJISString(const AuUInt8 *in, AuUInt32 len)
{
AuUInt32 i;
for (i = 0; i < len; )
{
auto next = GetLenSJISCodePoint(in + i, len - i);
if (next == 0) return i;
i += next;
}
return i;
}
static int GetLenGBKCodePoint(const AuUInt8 *in, AuUInt32 len)
{
if (len == 0) return 0;
auto b = in[0];
if (b >= 0x80)
{
if (len < 2) return 0;
else return 2;
}
return 1;
}
static int GetLenGBKString(const AuUInt8 *in, AuUInt32 len)
{
AuUInt32 i;
for (i = 0; i < len; )
{
auto next = GetLenGBKCodePoint(in + i, len - i);
if (next == 0) return i;
i += next;
}
return i;
}
AuStreamReadWrittenPair_t EncodeUTF8(TypeIn_t binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utfLen)
{
int offset = 0;
@ -57,7 +120,18 @@ namespace Aurora::Locale::Encoding
return {};
}
auto real = state.CPToUTF8(reinterpret_cast<TypeCast_t>(binary) + offset, binaryLength - offset, utf8, utfLen);
binaryLength = binaryLength - offset;
if (page == ECodePage::eGBK)
binaryLength = GetLenGBKString(reinterpret_cast<TypeCast_t>(binary) + offset, binaryLength);
else if (page == ECodePage::eSJIS)
binaryLength = GetLenSJISString(reinterpret_cast<TypeCast_t>(binary) + offset, binaryLength);
else if ((page == ECodePage::eUTF16) || (page == ECodePage::eUTF16BE))
binaryLength &= ~1;
else if ((page == ECodePage::eUTF32) || (page == ECodePage::eUTF32))
binaryLength &= ~3;
auto real = state.CPToUTF8(reinterpret_cast<TypeCast_t>(binary) + offset, binaryLength, utf8, utfLen);
return AuMakePair(real.first + offset, real.second);
}