AuroraRuntime/Source/Locale/Encoding/UTFn/AuUTF16.hpp

263 lines
6.5 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuUTF16.hpp
Date: 2021-10-31
Author: Reece
***/
#pragma once
#include "AuUTF8.hpp"
namespace Aurora::Locale::Encoding::UTF16
{
static void SwapU16(void *base, AuUInt32 count)
{
count *= 2;
for (AuUInt32 i = 0; i < count; i += 2)
{
AuWriteU16BE(base, i, AuReadU16LE(base, i));
}
}
static const AuUInt16 kHighSurrogateStart = 0xd800;
static const AuUInt16 kHighSurrogateEnd = 0xdbff;
static constexpr bool IsHighSurrogate(const AuUInt16 i)
{
return (i & AuUInt16(0xfffffc00)) == kHighSurrogateStart;
}
static const AuUInt16 kLowSurrogateStart = 0xdc00;
static const AuUInt16 kLowSurrogateEnd = 0xdfff;
static constexpr bool IsLowSurrogate(const AuUInt16 i)
{
return (i & AuUInt16(0xfffffc00)) == kLowSurrogateStart;
}
static int GetLenUC2CodePointLE(const AuUInt8 *in, AuUInt32 len)
{
// Check for at least one U16 word
if (len < 2)
{
return 0;
}
// Neeto, we found a codepoint in range
if (!IsHighSurrogate(AuReadU16LE(in, 0)))
{
return 2;
}
// Check we have enough stream overhead to consume 4 bytes
if (len < 4)
{
return 0;
}
return IsLowSurrogate(AuReadU16LE(in, 2)) ? 4 : 0;
}
static int GetLenUC2CodePointBE(const AuUInt8 *in, AuUInt32 len)
{
// Check for at least one U16 word
if (len < 2)
{
return 0;
}
// Neeto, we found a codepoint in range
if (!IsHighSurrogate(AuReadU16BE(in, 0)))
{
return 2;
}
// Check we have enough stream overhead to consume 4 bytes
if (len < 4)
{
return 0;
}
return IsLowSurrogate(AuReadU16BE(in, 2)) ? 4 : 0;
}
static int GetLenUC2CodePoint(AuUInt32 &codepoint, const AuUInt8 *in, AuUInt32 len)
{
// Check for at least one U16 word
if (len < 2)
{
return 0;
}
// Neeto, we found a codepoint in range
auto high = AuReadU16LE(in, 0);
if (!IsHighSurrogate(high))
{
codepoint = high;
return 2;
}
// Check we have enough stream overhead to consume 4 bytes
if (len < 4)
{
return 0;
}
auto low = AuReadU16LE(in, 2);
if (!IsLowSurrogate(low))
{
return 0;
}
codepoint = ((AuUInt32(high & 0x3FFU) << 10) | AuUInt32(low & 0x3FFU)) + 0x10000;
return 4;
}
static AuStreamReadWrittenPair_t CPToUTF8(const AuMemoryViewRead &utf16, const AuMemoryViewWrite &utf8)
{
AuUInt32 cp {};
AuUInt nextOffset {};
AuUInt totalOffset {};
AuUInt32 writeOffset {};
const AuUInt8 *pItr = utf16.Begin<AuUInt8>(); //reinterpret_cast<const AuUInt8 *>(in);
auto pcUtf8Itr = utf8.Begin<char>();
auto length = utf16.length;
if (utf8)
{
while (nextOffset = GetLenUC2CodePoint(cp, pItr, length))
{
length -= nextOffset;
pItr += nextOffset;
totalOffset += nextOffset;
if (!UTF8::WriteCp(cp, pcUtf8Itr, writeOffset, utf8.length))
{
break;
}
}
}
else
{
while (nextOffset = GetLenUC2CodePoint(cp, pItr, length))
{
length -= nextOffset;
pItr += nextOffset;
totalOffset += nextOffset;
writeOffset += UTF8::CountU8Overhead(cp);
}
}
return {totalOffset, writeOffset};
}
static AuStreamReadWrittenPair_t UTF8ToCp(const AuMemoryViewRead &utf8, const AuMemoryViewWrite &utf16)
{
auto pair = ReadUTF8IntoUTF32ByteString(utf8, {});
auto bytes = pair.first;
auto codepoints = pair.second / sizeof(AuUInt32);
if (!(pair.first & pair.second))
{
return {};
}
auto utf32Array = AuMakeSharedArray<AuUInt32>(codepoints);
if (!utf32Array)
{
return {};
}
auto pair2 = ReadUTF8IntoUTF32ByteString(utf8, {utf32Array.get(), codepoints});
if (!(pair2.first & pair2.second))
{
return {};
}
if (!utf16)
{
AuUInt cpOffset {};
auto ptr = utf32Array.get();
for (AuUInt i = 0; i < codepoints; i++)
{
if (ptr[i] <= 0xFFFFU)
{
cpOffset += 2;
}
else
{
cpOffset += 4;
}
}
return {pair2.first, cpOffset};
}
auto out = utf16.Begin<AuUInt16>();
auto end = utf16.End<AuUInt16>();
AuUInt cpOffset {};
for (AuUInt i = 0; i < codepoints; i++)
{
auto c = utf32Array.get()[i];
if (c <= 0xFFFFU)
{
if ((out + 2) > end)
{
break;
}
*(out++) = c;
cpOffset += 2;
}
else
{
c -= 0x10000U;
auto high = AuUInt16(0xD800U | ((c >> 10) & 0x3FFU));
auto low = AuUInt16(0xDC00U | (c & 0x3FFU));
if ((out + 4) > end)
{
break;
}
*(out++) = high;
cpOffset += 2;
*(out++) = low;
cpOffset += 2;
}
}
return {pair2.first, cpOffset};
}
static int Count16(const void *base, AuUInt32 length, bool bytes = false, bool le = true)
{
AuUInt32 i {}, cps {};
for (; i < length; )
{
auto next = le ?
GetLenUC2CodePointLE(((const AuUInt8 *)base) + i, length - i) :
GetLenUC2CodePointBE(((const AuUInt8 *)base) + i, length - i);
if (next == 0)
{
return bytes ? i : cps;
}
if (i + next > length)
{
return bytes ? i : cps;
}
i += next;
cps++;
}
return bytes ? i : cps;
}
}