AuroraRuntime/Source/Locale/Encoding/UTFn/AuUTF16.hpp
J Reece Wilson fd0c5b51b2 Further Linux support
[+] Begin work on IO futexes for io release on process/thread exit
[+] Linux ::readdir iteration
[+] AuConsole buffering API
[*] Fix sleep as to not get interrupted by signals
[*] Switch the type of FS lock used under Linux
[*] Linux: Use new IPCHandle encoding scheme
[*] Fix undefined behaviour: unintialized timeout values (AuLoop/Linux)
[*] Fix undefined behaviour: ConsoleTTY clear line was called of a color of a random value on stack
[-] Remainings of std dir iterator
[*] Fix pthread_kill (aka send signal to pthread handle) always kills process. This is what you expect bc signal handler inheritance.
[*] Reformat the build Aurora.json file
[+] Added clang warning ignores to the build file
[*] Fix: UNIX need to use STDOUT_FILENO. Was using CRT handle in place of fd by mistake.
[+] Linux implementation for IO yield (AuIO::IOYield() - UNIX::LinuxOverlappedYield())
[*] Fix: Linux async end of stream processing. res 0 = zero bytes consumed. <= was detecting this as an error of code 0. Should succeed with zero bytes.
[+] Linux LoopQueue missing epilogue hook for the IO processor
[*] Various refactors and minor bug fixes
[*] Linux fix: Handle pipe EOS as zero
[*] Linux fix: thread termination via a user signal of 77. Need a force terminate.
[*] IPC handle: fix improper int to bool cast in the header setup within ToString
[*] Linux fix: HWInfo CPU topology regression
[-] Linux fix: remove SIGABRT handler
[*] Missing override in compression, exit, and consoletty headers.
[+] Unix Syslog logger backend
2022-08-02 05:52:57 +01:00

263 lines
6.5 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuUTF16.hpp
Date: 2021-10-31
Author: Reece
***/
#pragma once
#include "AuUTF8.hpp"
namespace Aurora::Locale::Encoding::UTF16
{
static void SwapU16(void *base, AuUInt32 count)
{
count *= 2;
for (AuUInt32 i = 0; i < count; i += 2)
{
AuWriteU16BE(base, i, AuReadU16LE(base, i));
}
}
static const AuUInt16 kHighSurrogateStart = 0xd800;
static const AuUInt16 kHighSurrogateEnd = 0xdbff;
static constexpr bool IsHighSurrogate(const AuUInt16 i)
{
return (i & AuUInt16(0xfffffc00)) == kHighSurrogateStart;
}
static const AuUInt16 kLowSurrogateStart = 0xdc00;
static const AuUInt16 kLowSurrogateEnd = 0xdfff;
static constexpr bool IsLowSurrogate(const AuUInt16 i)
{
return (i & AuUInt16(0xfffffc00)) == kLowSurrogateStart;
}
static int GetLenUC2CodePointLE(const AuUInt8 *in, AuUInt32 len)
{
// Check for at least one U16 word
if (len < 2)
{
return 0;
}
// Neeto, we found a codepoint in range
if (!IsHighSurrogate(AuReadU16LE(in, 0)))
{
return 2;
}
// Check we have enough stream overhead to consume 4 bytes
if (len < 4)
{
return 0;
}
return IsLowSurrogate(AuReadU16LE(in, 2)) ? 4 : 0;
}
static int GetLenUC2CodePointBE(const AuUInt8 *in, AuUInt32 len)
{
// Check for at least one U16 word
if (len < 2)
{
return 0;
}
// Neeto, we found a codepoint in range
if (!IsHighSurrogate(AuReadU16BE(in, 0)))
{
return 2;
}
// Check we have enough stream overhead to consume 4 bytes
if (len < 4)
{
return 0;
}
return IsLowSurrogate(AuReadU16BE(in, 2)) ? 4 : 0;
}
static int GetLenUC2CodePoint(AuUInt32 &codepoint, const AuUInt8 *in, AuUInt32 len)
{
// Check for at least one U16 word
if (len < 2)
{
return 0;
}
// Neeto, we found a codepoint in range
auto high = AuReadU16LE(in, 0);
if (!IsHighSurrogate(high))
{
codepoint = high;
return 2;
}
// Check we have enough stream overhead to consume 4 bytes
if (len < 4)
{
return 0;
}
auto low = AuReadU16LE(in, 2);
if (!IsLowSurrogate(low))
{
return 0;
}
codepoint = ((AuUInt32(high & 0x3FFU) << 10) | AuUInt32(low & 0x3FFU)) + 0x10000;
return 4;
}
static AuStreamReadWrittenPair_t CPToUTF8(const AuMemoryViewRead &utf16, const AuMemoryViewWrite &utf8)
{
AuUInt32 cp {};
AuUInt nextOffset {};
AuUInt totalOffset {};
AuUInt32 writeOffset {};
const AuUInt8 *pItr = utf16.Begin<AuUInt8>(); //reinterpret_cast<const AuUInt8 *>(in);
auto pcUtf8Itr = utf8.Begin<char>();
auto length = utf16.length;
if (utf8)
{
while ((nextOffset = GetLenUC2CodePoint(cp, pItr, length)))
{
length -= nextOffset;
pItr += nextOffset;
totalOffset += nextOffset;
if (!UTF8::WriteCp(cp, pcUtf8Itr, writeOffset, utf8.length))
{
break;
}
}
}
else
{
while ((nextOffset = GetLenUC2CodePoint(cp, pItr, length)))
{
length -= nextOffset;
pItr += nextOffset;
totalOffset += nextOffset;
writeOffset += UTF8::CountU8Overhead(cp);
}
}
return {totalOffset, writeOffset};
}
static AuStreamReadWrittenPair_t UTF8ToCp(const AuMemoryViewRead &utf8, const AuMemoryViewWrite &utf16)
{
auto pair = ReadUTF8IntoUTF32ByteString(utf8, {});
auto bytes = pair.first;
auto codepoints = pair.second / sizeof(AuUInt32);
if (!(pair.first & pair.second))
{
return {};
}
auto utf32Array = AuMakeSharedArray<AuUInt32>(codepoints);
if (!utf32Array)
{
return {};
}
auto pair2 = ReadUTF8IntoUTF32ByteString(utf8, {utf32Array.get(), codepoints});
if (!(pair2.first & pair2.second))
{
return {};
}
if (!utf16)
{
AuUInt cpOffset {};
auto ptr = utf32Array.get();
for (AuUInt i = 0; i < codepoints; i++)
{
if (ptr[i] <= 0xFFFFU)
{
cpOffset += 2;
}
else
{
cpOffset += 4;
}
}
return {pair2.first, cpOffset};
}
auto out = utf16.Begin<AuUInt16>();
auto end = utf16.End<AuUInt16>();
AuUInt cpOffset {};
for (AuUInt i = 0; i < codepoints; i++)
{
auto c = utf32Array.get()[i];
if (c <= 0xFFFFU)
{
if ((out + 2) > end)
{
break;
}
*(out++) = c;
cpOffset += 2;
}
else
{
c -= 0x10000U;
auto high = AuUInt16(0xD800U | ((c >> 10) & 0x3FFU));
auto low = AuUInt16(0xDC00U | (c & 0x3FFU));
if ((out + 4) > end)
{
break;
}
*(out++) = high;
cpOffset += 2;
*(out++) = low;
cpOffset += 2;
}
}
return {pair2.first, cpOffset};
}
static int Count16(const void *base, AuUInt32 length, bool bytes = false, bool le = true)
{
AuUInt32 i {}, cps {};
for (; i < length; )
{
auto next = le ?
GetLenUC2CodePointLE(((const AuUInt8 *)base) + i, length - i) :
GetLenUC2CodePointBE(((const AuUInt8 *)base) + i, length - i);
if (next == 0)
{
return bytes ? i : cps;
}
if (i + next > length)
{
return bytes ? i : cps;
}
i += next;
cps++;
}
return bytes ? i : cps;
}
}