J Reece Wilson
fd0c5b51b2
[+] Begin work on IO futexes for io release on process/thread exit [+] Linux ::readdir iteration [+] AuConsole buffering API [*] Fix sleep as to not get interrupted by signals [*] Switch the type of FS lock used under Linux [*] Linux: Use new IPCHandle encoding scheme [*] Fix undefined behaviour: unintialized timeout values (AuLoop/Linux) [*] Fix undefined behaviour: ConsoleTTY clear line was called of a color of a random value on stack [-] Remainings of std dir iterator [*] Fix pthread_kill (aka send signal to pthread handle) always kills process. This is what you expect bc signal handler inheritance. [*] Reformat the build Aurora.json file [+] Added clang warning ignores to the build file [*] Fix: UNIX need to use STDOUT_FILENO. Was using CRT handle in place of fd by mistake. [+] Linux implementation for IO yield (AuIO::IOYield() - UNIX::LinuxOverlappedYield()) [*] Fix: Linux async end of stream processing. res 0 = zero bytes consumed. <= was detecting this as an error of code 0. Should succeed with zero bytes. [+] Linux LoopQueue missing epilogue hook for the IO processor [*] Various refactors and minor bug fixes [*] Linux fix: Handle pipe EOS as zero [*] Linux fix: thread termination via a user signal of 77. Need a force terminate. [*] IPC handle: fix improper int to bool cast in the header setup within ToString [*] Linux fix: HWInfo CPU topology regression [-] Linux fix: remove SIGABRT handler [*] Missing override in compression, exit, and consoletty headers. [+] Unix Syslog logger backend
263 lines
6.5 KiB
C++
263 lines
6.5 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: AuUTF16.hpp
|
|
Date: 2021-10-31
|
|
Author: Reece
|
|
***/
|
|
#pragma once
|
|
|
|
#include "AuUTF8.hpp"
|
|
|
|
namespace Aurora::Locale::Encoding::UTF16
|
|
{
|
|
static void SwapU16(void *base, AuUInt32 count)
|
|
{
|
|
count *= 2;
|
|
for (AuUInt32 i = 0; i < count; i += 2)
|
|
{
|
|
AuWriteU16BE(base, i, AuReadU16LE(base, i));
|
|
}
|
|
}
|
|
|
|
static const AuUInt16 kHighSurrogateStart = 0xd800;
|
|
static const AuUInt16 kHighSurrogateEnd = 0xdbff;
|
|
static constexpr bool IsHighSurrogate(const AuUInt16 i)
|
|
{
|
|
return (i & AuUInt16(0xfffffc00)) == kHighSurrogateStart;
|
|
}
|
|
|
|
static const AuUInt16 kLowSurrogateStart = 0xdc00;
|
|
static const AuUInt16 kLowSurrogateEnd = 0xdfff;
|
|
static constexpr bool IsLowSurrogate(const AuUInt16 i)
|
|
{
|
|
return (i & AuUInt16(0xfffffc00)) == kLowSurrogateStart;
|
|
}
|
|
|
|
static int GetLenUC2CodePointLE(const AuUInt8 *in, AuUInt32 len)
|
|
{
|
|
// Check for at least one U16 word
|
|
if (len < 2)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
// Neeto, we found a codepoint in range
|
|
if (!IsHighSurrogate(AuReadU16LE(in, 0)))
|
|
{
|
|
return 2;
|
|
}
|
|
|
|
// Check we have enough stream overhead to consume 4 bytes
|
|
if (len < 4)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
return IsLowSurrogate(AuReadU16LE(in, 2)) ? 4 : 0;
|
|
}
|
|
|
|
static int GetLenUC2CodePointBE(const AuUInt8 *in, AuUInt32 len)
|
|
{
|
|
// Check for at least one U16 word
|
|
if (len < 2)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
// Neeto, we found a codepoint in range
|
|
if (!IsHighSurrogate(AuReadU16BE(in, 0)))
|
|
{
|
|
return 2;
|
|
}
|
|
|
|
// Check we have enough stream overhead to consume 4 bytes
|
|
if (len < 4)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
return IsLowSurrogate(AuReadU16BE(in, 2)) ? 4 : 0;
|
|
}
|
|
|
|
static int GetLenUC2CodePoint(AuUInt32 &codepoint, const AuUInt8 *in, AuUInt32 len)
|
|
{
|
|
// Check for at least one U16 word
|
|
if (len < 2)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
// Neeto, we found a codepoint in range
|
|
auto high = AuReadU16LE(in, 0);
|
|
if (!IsHighSurrogate(high))
|
|
{
|
|
codepoint = high;
|
|
return 2;
|
|
}
|
|
|
|
// Check we have enough stream overhead to consume 4 bytes
|
|
if (len < 4)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
auto low = AuReadU16LE(in, 2);
|
|
if (!IsLowSurrogate(low))
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
codepoint = ((AuUInt32(high & 0x3FFU) << 10) | AuUInt32(low & 0x3FFU)) + 0x10000;
|
|
return 4;
|
|
}
|
|
|
|
static AuStreamReadWrittenPair_t CPToUTF8(const AuMemoryViewRead &utf16, const AuMemoryViewWrite &utf8)
|
|
{
|
|
AuUInt32 cp {};
|
|
AuUInt nextOffset {};
|
|
AuUInt totalOffset {};
|
|
AuUInt32 writeOffset {};
|
|
|
|
const AuUInt8 *pItr = utf16.Begin<AuUInt8>(); //reinterpret_cast<const AuUInt8 *>(in);
|
|
auto pcUtf8Itr = utf8.Begin<char>();
|
|
auto length = utf16.length;
|
|
|
|
if (utf8)
|
|
{
|
|
while ((nextOffset = GetLenUC2CodePoint(cp, pItr, length)))
|
|
{
|
|
length -= nextOffset;
|
|
pItr += nextOffset;
|
|
totalOffset += nextOffset;
|
|
|
|
if (!UTF8::WriteCp(cp, pcUtf8Itr, writeOffset, utf8.length))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while ((nextOffset = GetLenUC2CodePoint(cp, pItr, length)))
|
|
{
|
|
length -= nextOffset;
|
|
pItr += nextOffset;
|
|
totalOffset += nextOffset;
|
|
|
|
writeOffset += UTF8::CountU8Overhead(cp);
|
|
}
|
|
}
|
|
|
|
return {totalOffset, writeOffset};
|
|
}
|
|
|
|
static AuStreamReadWrittenPair_t UTF8ToCp(const AuMemoryViewRead &utf8, const AuMemoryViewWrite &utf16)
|
|
{
|
|
auto pair = ReadUTF8IntoUTF32ByteString(utf8, {});
|
|
auto bytes = pair.first;
|
|
auto codepoints = pair.second / sizeof(AuUInt32);
|
|
|
|
if (!(pair.first & pair.second))
|
|
{
|
|
return {};
|
|
}
|
|
|
|
auto utf32Array = AuMakeSharedArray<AuUInt32>(codepoints);
|
|
if (!utf32Array)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
auto pair2 = ReadUTF8IntoUTF32ByteString(utf8, {utf32Array.get(), codepoints});
|
|
|
|
if (!(pair2.first & pair2.second))
|
|
{
|
|
return {};
|
|
}
|
|
|
|
if (!utf16)
|
|
{
|
|
AuUInt cpOffset {};
|
|
auto ptr = utf32Array.get();
|
|
|
|
for (AuUInt i = 0; i < codepoints; i++)
|
|
{
|
|
if (ptr[i] <= 0xFFFFU)
|
|
{
|
|
cpOffset += 2;
|
|
}
|
|
else
|
|
{
|
|
cpOffset += 4;
|
|
}
|
|
}
|
|
|
|
return {pair2.first, cpOffset};
|
|
}
|
|
|
|
auto out = utf16.Begin<AuUInt16>();
|
|
auto end = utf16.End<AuUInt16>();
|
|
|
|
AuUInt cpOffset {};
|
|
for (AuUInt i = 0; i < codepoints; i++)
|
|
{
|
|
auto c = utf32Array.get()[i];
|
|
|
|
if (c <= 0xFFFFU)
|
|
{
|
|
if ((out + 2) > end)
|
|
{
|
|
break;
|
|
}
|
|
|
|
*(out++) = c;
|
|
cpOffset += 2;
|
|
}
|
|
else
|
|
{
|
|
c -= 0x10000U;
|
|
auto high = AuUInt16(0xD800U | ((c >> 10) & 0x3FFU));
|
|
auto low = AuUInt16(0xDC00U | (c & 0x3FFU));
|
|
|
|
if ((out + 4) > end)
|
|
{
|
|
break;
|
|
}
|
|
|
|
*(out++) = high;
|
|
cpOffset += 2;
|
|
*(out++) = low;
|
|
cpOffset += 2;
|
|
}
|
|
}
|
|
|
|
return {pair2.first, cpOffset};
|
|
}
|
|
|
|
static int Count16(const void *base, AuUInt32 length, bool bytes = false, bool le = true)
|
|
{
|
|
AuUInt32 i {}, cps {};
|
|
|
|
for (; i < length; )
|
|
{
|
|
auto next = le ?
|
|
GetLenUC2CodePointLE(((const AuUInt8 *)base) + i, length - i) :
|
|
GetLenUC2CodePointBE(((const AuUInt8 *)base) + i, length - i);
|
|
if (next == 0)
|
|
{
|
|
return bytes ? i : cps;
|
|
}
|
|
|
|
if (i + next > length)
|
|
{
|
|
return bytes ? i : cps;
|
|
}
|
|
|
|
i += next;
|
|
cps++;
|
|
}
|
|
|
|
return bytes ? i : cps;
|
|
}
|
|
} |