[*] Linux build regressions, and shrink the size of Linux RWLocks to 48 bytes from 64

This commit is contained in:
Reece Wilson 2024-05-07 14:41:16 +01:00
parent 8e1c74a5df
commit 631624dc55
13 changed files with 186 additions and 39 deletions

View File

@ -78,8 +78,8 @@ namespace Aurora::Memory
pFuckCppRetardsFixYourWorthlessSpec(pFuckCppRetardsFixYourWorthlessSpec)
{ }
template <class T>
inline CppHeapWrapper(const CppHeapWrapper<T> &fuckCpp)
template <class B>
inline CppHeapWrapper(const CppHeapWrapper<B> &fuckCpp)
{
this->pFuckCppRetardsFixYourWorthlessSpec = (CppHeapWrapper *)&fuckCpp;
detail::AccessorICantEven::Set(fuckCpp, this);
@ -281,6 +281,12 @@ namespace Aurora::Memory
}
}
template <class Z>
bool operator==(const Aurora::Memory::CppHeapWrapper<Z> &rhs) noexcept
{
return this->GetHeapRaw() == rhs.GetHeapRaw();
}
private:
// should be sizeof(void *) * 4 = [pHeap, pControlBlock, pParent, pSingleThreadChild]
// nor not. it doesnt matter.

View File

@ -39,7 +39,7 @@ namespace Aurora::Threading::Primitives
static const auto kPrimitiveSize64LinuxSemaphore = 16;
static const auto kPrimitiveSize64LinuxCS = 32;
static const auto kPrimitiveSize64LinuxEvent = 32;
static const auto kPrimitiveSize64LinuxRWLock = 64;
static const auto kPrimitiveSize64LinuxRWLock = 48;
static const auto kPrimitiveSize64LinuxCond = 32;
static const auto kPrimitiveSize64LinuxCondMutex = 16;
@ -48,7 +48,7 @@ namespace Aurora::Threading::Primitives
static const auto kPrimitiveSize32LinuxSemaphore = 12;
static const auto kPrimitiveSize32LinuxCS = 32;
static const auto kPrimitiveSize32LinuxEvent = 32;
static const auto kPrimitiveSize32LinuxRWLock = 64;
static const auto kPrimitiveSize32LinuxRWLock = 48;
static const auto kPrimitiveSize32LinuxCond = 32;
static const auto kPrimitiveSize32LinuxCondMutex = 12;

View File

@ -36,7 +36,7 @@ namespace Aurora::Console::ConsoleTTY
return true;
}
static void TTYWrite(const AuString &in)
static void TTYWrite(const AuROString &in)
{
if (in.empty())
{

View File

@ -26,13 +26,13 @@ namespace Aurora::IO::FS
{
bool _MkDir(const AuROString &path)
{
AuString subdir;
AuROString subdir;
if ((path.size() > 1) &&
((path[path.size() - 1] == '/') ||
(path[path.size() - 1] == '\\')))
{
subdir = path.substr(0, path.size() - 1);
subdir = AuROString(path).substr(0, path.size() - 1);
}
else
{
@ -44,12 +44,12 @@ namespace Aurora::IO::FS
mode_t mode { 0775 };
struct stat s;
if (::stat(subdir.c_str(), &s) != -1)
if (::stat(AuString(subdir).c_str(), &s) != -1)
{
mode = s.st_mode;
}
return ::mkdir(path.c_str(), mode) == 0;
return ::mkdir(AuString(path).c_str(), mode) == 0;
}
struct ReadDirStructure : IReadDir
@ -304,7 +304,7 @@ namespace Aurora::IO::FS
return true;
}
static bool UnixExists(const AuROString &path, bool dir)
static bool UnixExists(const AuString &path, bool dir)
{
struct stat s;
int err = ::stat(path.c_str(), &s);

View File

@ -433,7 +433,7 @@ namespace Aurora::IO::FS
AuSafeDelete<PosixFileStream *>(that);
}
static IFileStream *OpenNewEx(const AuString &path,
static IFileStream *OpenNewEx(const AuROString &path,
EFileOpenMode openMode,
EFileAdvisoryLockLevel lock,
bool bCheck)

View File

@ -26,13 +26,13 @@ namespace Aurora::IO::FS
return {};
}
AuString subdir;
AuROString subdir;
if ((path.size() > 1) &&
((path[path.size() - 1] == '/') ||
(path[path.size() - 1] == '\\')))
{
subdir = srcPath.substr(0, srcPath.size() - 1);
subdir = AuROString(srcPath).substr(0, srcPath.size() - 1);
}
else
{
@ -41,24 +41,24 @@ namespace Aurora::IO::FS
GoUpToSeparator(subdir, subdir);
subdir = "file:///" + subdir;
AuString subdirStr = AuString("file:///") + AuString(subdir);
if (::setxattr(srcPath.c_str(), "user.xdg.origin.url", subdir.c_str(), subdir.size(), XATTR_CREATE) == -1)
if (::setxattr(srcPath.c_str(), "user.xdg.origin.url", subdirStr.c_str(), subdirStr.size(), XATTR_CREATE) == -1)
{
if (errno == EEXIST)
{
if (::setxattr(srcPath.c_str(), "user.xdg.origin.url", subdir.c_str(), subdir.size(), XATTR_REPLACE) == -1)
if (::setxattr(srcPath.c_str(), "user.xdg.origin.url", subdirStr.c_str(), subdirStr.size(), XATTR_REPLACE) == -1)
{
return false;
}
}
}
if (::setxattr(srcPath.c_str(), "user.xdg.referrer.url", subdir.c_str(), subdir.size(), XATTR_CREATE) == -1)
if (::setxattr(srcPath.c_str(), "user.xdg.referrer.url", subdirStr.c_str(), subdirStr.size(), XATTR_CREATE) == -1)
{
if (errno == EEXIST)
{
if (::setxattr(srcPath.c_str(), "user.xdg.referrer.url", subdir.c_str(), subdir.size(), XATTR_REPLACE) == -1)
if (::setxattr(srcPath.c_str(), "user.xdg.referrer.url", subdirStr.c_str(), subdirStr.size(), XATTR_REPLACE) == -1)
{
return false;
}

View File

@ -60,7 +60,7 @@ namespace Aurora::Locale
WideCharToMultiByte(CP_UTF8, 0, in, length, ret.data(), ret.size(), NULL, NULL);
return ret;
#elif !defined(AU_NO_CPPLOCALE)
return gUtf8Conv.to_bytes(std::wstring(in, wcsnlen(in, length)));
return AuString(gUtf8Conv.to_bytes(std::wstring(in, wcsnlen(in, length))));
#else
SysPushErrorUnimplemented("ConvertFromWChar");
return {};
@ -93,7 +93,7 @@ namespace Aurora::Locale
MultiByteToWideChar(CP_UTF8, 0, in.data(), in.length(), ret.data(), ret.size());
return ret;
#elif !defined(AU_NO_CPPLOCALE)
return gUtf8Conv.from_bytes(in);
return gUtf8Conv.from_bytes(in.data(), in.data() + in.size());
#else
SysPushErrorUnimplemented("ConvertFromUTF8");
return {};

View File

@ -76,11 +76,11 @@ namespace Aurora::Process
}
Sections sections;
AuParse::SplitNewlines(map, [&](const AuString &line)
AuParse::SplitNewlines(map, [&](const AuROString &line)
{
bool bIsSpecialFile {};
char *endPtr;
auto base = strtoll(line.c_str(), &endPtr, 16);
auto base = strtoll(line.data(), &endPtr, 16);
if (errno == ERANGE) return;
if (*endPtr != '-') return;
@ -185,7 +185,7 @@ namespace Aurora::Process
refMod->moduleMeta = AuMakeShared<ModuleMeta>();
if (!refMod->moduleMeta) return;
AuString fileName;
AuROString fileName;
AuIOFS::GetFileFromPath(fileName, name);
refMod->moduleMeta->moduleBase = base;
refMod->moduleMeta->moduleName = fileName;
@ -217,7 +217,7 @@ namespace Aurora::Process
{
auto & [path, sections] = pair;
AuString file;
AuROString file;
AuIOFS::GetFileFromPath(file, path);
auto object = AuIOFS::OpenReadUnique(path);
if (!object)

View File

@ -54,7 +54,7 @@ namespace Aurora::SWInfo
osInfo.uKernelPatch = strtoll(endPtr, &endPtr, 10);
}
static bool ProcessVersionLine(OSInformation &osInfo,const AuString &line);
static bool ProcessVersionLine(OSInformation &osInfo, const AuROString &line);
static void ParseOSRel(OSInformation &osInfo)
{
@ -70,13 +70,13 @@ namespace Aurora::SWInfo
AuReplaceAll(osRel, "\"", "");
AuList<AuString> versionNumbers;
AuList<AuROString> versionNumbers;
Parse::SplitNewlines(osRel, [&](const AuString &line)
Parse::SplitNewlines(osRel, [&](const AuROString &line)
{
if (AuStartsWith(line, "NAME="))
{
gUserlandBrand = line.substr(5);
gUserlandBrand = AuString(line.substr(5));
}
else if (AuStartsWith(line, "VERSION="))
{
@ -92,7 +92,7 @@ namespace Aurora::SWInfo
}
});
std::sort(versionNumbers.begin(), versionNumbers.end(), [](const AuString &str, const AuString &strb)
std::sort(versionNumbers.begin(), versionNumbers.end(), [](const AuROString &str, const AuROString &strb)
{
return str.length() >= strb.length();
});
@ -114,7 +114,7 @@ namespace Aurora::SWInfo
osInfo.bIsServer |= osRel.find("erver") != AuString::npos;
}
static bool ProcessVersionLineParts(OSInformation &osInfo, const AuString &line)
static bool ProcessVersionLineParts(OSInformation &osInfo, const AuROString &line)
{
auto firstPeriod = line.find(".");
if (firstPeriod == AuString::npos)
@ -123,7 +123,7 @@ namespace Aurora::SWInfo
}
auto ptr = line.data();
char *endPtr = (char *)line.c_str() + line.size();
char *endPtr = (char *)line.data() + line.size();
osInfo.uUserlandMajor = strtoll(ptr, &endPtr, 10);
if (errno == ERANGE) return false;
@ -141,7 +141,7 @@ namespace Aurora::SWInfo
return true;
}
static bool ProcessVersionLine(OSInformation &osInfo, const AuString &line)
static bool ProcessVersionLine(OSInformation &osInfo, const AuROString &line)
{
auto parts = AuSplitString(line, " ");

View File

@ -11,6 +11,10 @@
#include "SMTYield.hpp"
#include "../AuWakeInternal.hpp"
#if defined(AURORA_RWLOCK_IS_CONDLESS)
#define gUseFutexRWLock 1
#endif
namespace Aurora::Threading::Primitives
{
/* static const AuUInt8 ... = ...; */ #define kOffsetOfRead AuOffsetOf(&RWLockImpl<true>::read_)
@ -125,6 +129,7 @@ namespace Aurora::Threading::Primitives
{
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
template<bool bIsWriteRecursionAllowed>
ConditionVariableInternal &RWLockImpl<bIsWriteRecursionAllowed>::GetCondition()
{
@ -144,6 +149,7 @@ namespace Aurora::Threading::Primitives
return this->conditionWriter_;
#endif
}
#endif
template<bool bIsWriteRecursionAllowed>
AuUInt32 *RWLockImpl<bIsWriteRecursionAllowed>::GetFutexCondition()
@ -154,13 +160,21 @@ namespace Aurora::Threading::Primitives
template<bool bIsWriteRecursionAllowed>
AuUInt32 *RWLockImpl<bIsWriteRecursionAllowed>::GetFutexConditionWriter()
{
#if defined(AURORA_RWLOCK_IS_CONDLESS)
return (AuUInt32 *)&this->uSemaphore_;
#else
return (AuUInt32 *)this->conditionVariableWriter_;
#endif
}
template<bool bIsWriteRecursionAllowed>
AuUInt32 *RWLockImpl<bIsWriteRecursionAllowed>::GetReadSleepCounter()
{
#if defined(AURORA_RWLOCK_IS_CONDLESS)
return (AuUInt32 *)&this->uRDCounter_;
#else
return (AuUInt32 *)this->conditionVariable_;
#endif
}
template<bool bIsWriteRecursionAllowed>
@ -193,6 +207,7 @@ namespace Aurora::Threading::Primitives
return false;
}
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
else
{
RWLockAcquire;
@ -222,6 +237,7 @@ namespace Aurora::Threading::Primitives
}
}
}
#endif
}
}
while (iCurState < 0 ||
@ -267,6 +283,7 @@ namespace Aurora::Threading::Primitives
return false;
}
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
else
{
RWLockAcquire;
@ -296,6 +313,7 @@ namespace Aurora::Threading::Primitives
}
}
}
#endif
}
}
while (iCurState < 0 ||
@ -342,12 +360,14 @@ namespace Aurora::Threading::Primitives
{
bRet = this->LockWriteNSAbsUnlocked(uTimeout);
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
else
{
RWLockAcquire;
bRet = this->LockWriteNSAbsUnlocked(uTimeout);
}
#endif
AuAtomicSub(&this->dwWritersPending_, 1);
@ -402,6 +422,7 @@ namespace Aurora::Threading::Primitives
bRet = this->LockWriteNSAbsUnlocked(uEndTime);
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
else
{
uEndTime = uTimeout ? AuTime::SteadyClockNS() + uTimeout : 0;
@ -410,6 +431,7 @@ namespace Aurora::Threading::Primitives
bRet = this->LockWriteNSAbsUnlocked(uEndTime);
}
#endif
AuAtomicSub(&this->dwWritersPending_, 1);
@ -496,6 +518,7 @@ namespace Aurora::Threading::Primitives
this->FutexWriterWake();
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
else
{
if (qwTimeoutNS)
@ -521,6 +544,7 @@ namespace Aurora::Threading::Primitives
continue;
}
}
#endif
}
return true;
@ -545,11 +569,13 @@ namespace Aurora::Threading::Primitives
{
InternalLTSWakeOne((const void *)&this->iState_);
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
else
{
RWLockBarrierSelf();
this->GetCondition().Signal();
}
#endif
}
template<bool bIsWriteRecursionAllowed>
@ -561,11 +587,13 @@ namespace Aurora::Threading::Primitives
AuAtomicAdd(pThat, 1u);
InternalLTSWakeOne(pThat); // Note: all paths check for a waiter ahead of time!
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
else
{
RWLockBarrierSelf();
this->GetConditionWriter().Signal();
}
#endif
}
template<bool bIsWriteRecursionAllowed>
@ -578,11 +606,13 @@ namespace Aurora::Threading::Primitives
InternalLTSWakeAll((const void *)&this->iState_);
}
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
else
{
RWLockBarrierSelf();
this->GetCondition().Broadcast();
}
#endif
}
template<bool bIsWriteRecursionAllowed>
@ -597,11 +627,13 @@ namespace Aurora::Threading::Primitives
InternalLTSWakeCount(pThat, uCount);
}
}
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
else
{
RWLockBarrierSelf();
this->GetConditionWriter().Broadcast();
}
#endif
}
template<bool bIsWriteRecursionAllowed>
@ -741,6 +773,7 @@ namespace Aurora::Threading::Primitives
auto uEndTime = uTimeout ? AuTime::SteadyClockNS() + uTimeout : 0;
#if !defined(AURORA_RWLOCK_IS_CONDLESS)
if (!gUseFutexRWLock)
{
AuAtomicAdd(&this->dwWritersPending_, 1);
@ -776,6 +809,7 @@ namespace Aurora::Threading::Primitives
return this->UpgradeReadToWriteDoUpgrade();
}
else
#endif
{
while (true)
{

View File

@ -11,6 +11,28 @@
#include "AuConditionMutex.Generic.hpp"
#include "ThreadCookie.hpp"
#if defined(AURORA_IS_LINUX_DERIVED)
// Disable Windows XP - 7 keyedevent inline optimization (not that we couldn't use AURORA_RWLOCK_IS_CONDLESS on XP/7 with WOA_ALWAYS_DUMB_OS_TARGET).
// Note that *a lot* of platforms may end up requiring all thread primitives to use more verbose primitives.
// Keeping the condvar impl around is best in the event we need to optimize for unique platforms of sensitive schedulers.
// The noisey use-case of the semaphore hash table isn't what we always want for persistent objects with QoS properties.
// In terms of platform support/what-to-do-where:
// Here's one example of a QoS senitive OS without a futex wait queue interface we might care about: > MACOS <
// Now let's ignore the updoooters and support Windows XP - 7...
// ...this is a Linux specific thing
//
// Windows XP-7: same as 11 (no such win32 alternative) (EOL support)
// Windows 11 updoooters can cope with a 56-byte primitive in the worst case scenario (who even cares case?)
// Generic: 64-byte (no config, maybe use AURORA_RWLOCK_IS_CONDLESS) (expected case)
// Linux: AURORA_RWLOCK_IS_CONDLESS best case, 64-byte without (best case)
// AURORA_RWLOCK_IS_CONDLESS: 48-byte (best case)
// vs couple hundred byte primitives in the STL
// in either case AURORA_RWLOCK_IS_CONDLESS doesnt matter so much (delta linux:16, win32: measly 8 bytes)
// in either case performance is the same
#define AURORA_RWLOCK_IS_CONDLESS
#endif
namespace Aurora::Threading::Primitives
{
template<bool bIsWriteRecursionAllowed>
@ -109,6 +131,11 @@ namespace Aurora::Threading::Primitives
RWLockAccessView<false, RWLockImpl> write_;
private:
ThreadCookie_t reentrantWriteLockHandle_ {};
#if defined(AURORA_RWLOCK_IS_CONDLESS)
AuAUInt32 uSemaphore_ {};
AuAUInt32 uRDCounter_ {};
#else
ConditionMutexInternal mutex_;
#if defined(AURWLOCK_NO_SIZE_OPTIMIZED_CONDVAR)
ConditionVariableInternal condition_;
@ -117,8 +144,7 @@ namespace Aurora::Threading::Primitives
char conditionVariable_[kSizeOfDummyCondVar] {};
char conditionVariableWriter_[kSizeOfDummyCondVar] {};
#endif
ThreadCookie_t reentrantWriteLockHandle_ {};
#endif
AuAInt32 iState_ {};
AuAInt32 dwWritersPending_ {};
};

View File

@ -7,6 +7,77 @@
***/
#pragma once
#if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) && \
!defined(AURORA_COMPILER_MSVC) && \
!defined(AURORA_COMPILER_INTEL) && \
!defined(AURORA_A_GOOD_COMPILER_PLS)
// Even if clang (and gcc) has these intrins available, you must enable them globally, unlike see for some fucking reason.
// I mean, we can do runtime branching around SSE4 paths no problem. Why all of a sudden am i being gated out of the intrins im electing to use by hand?
// No, you (the compiler) may not use these in your baseline feature set (or incl in stl locks). Yes, i still want them. Now fuck off.
// If these end up being wrong, blame clang and gnu for being cunts, not me.
static auline void __mm_monitorx(void * __p, unsigned __extensions, unsigned __hints)
{
asm volatile(".byte 0x0f, 0x01, 0xfa;" :
: "a"(__p),
"c"(__extensions),
"d"(__hints));
}
static auline void __mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock)
{
asm volatile(".byte 0x0f, 0x01, 0xfb;" :
: "a"(__hints),
"b"(__clock),
"c"(__extensions));
}
static auline void __umonitor(void * __address)
{
__asm__ volatile(".byte 0xF3, 0x0F, 0xAE, 0x01;" :
: "a"(__address)
: );
}
static auline unsigned char __umwait(unsigned int __control, unsigned long long __counter)
{
AuUInt32 uTimeHi = AuUInt32(__counter >> 32);
AuUInt32 uTimeLo = AuUInt32(__counter & 0xffffffff);
char flag;
__asm__ volatile(".byte 0xF2, 0x0F, 0xAE, 0xF1\n"
"setb %0"
: "=r"(flag)
: "a"(uTimeLo),
"d"(uTimeHi),
"c"(__control)
: );
return flag;
}
static auline unsigned char __tpause(unsigned int __control, unsigned long long __counter)
{
AuUInt32 uTimeHi = AuUInt32(__counter >> 32);
AuUInt32 uTimeLo = AuUInt32(__counter & 0xffffffff);
char flag;
__asm__ volatile(".byte 0x66, 0x0F, 0xAE, 0xF1\n"
"setb %0"
: "=r"(flag)
: "a"(uTimeHi),
"d"(uTimeHi),
"c"(__control)
: );
return flag;
}
#define _mm_monitorx __mm_monitorx
#define _mm_mwaitx __mm_mwaitx
#define _umonitor __umonitor
#define _umwait __umwait
#define _tpause __tpause
#endif
namespace Aurora::Threading
{
inline AuUInt32 gHasThreadLocalTimeout {};
@ -252,7 +323,7 @@ namespace Aurora::Threading::Primitives
}
else
{
_mm_monitorx(&kMassiveBlock, 0, 0);
_mm_monitorx((void *)&kMassiveBlock, 0U, 0U);
_mm_mwaitx(kMWAITXUseTSC, 0, uCount);
bRet = callback();
}
@ -479,7 +550,7 @@ namespace Aurora::Threading::Primitives
}
else if (ThrdCfg::gIsZen3OrGreater)
{
_mm_monitorx((void *)pWord, 0, 0);
_mm_monitorx((void *)pWord, 0U, 0U);
if (callback())
{

View File

@ -12,6 +12,8 @@ extern "C"
{
AuUInt32 GetCurrentThreadIDFast();
}
#elif defined(AURORA_IS_LINUX_DERIVED) && defined(AURORA_ARCH_X64)
unsigned long long __readfsqword(unsigned int) noexcept;
#endif
namespace Aurora::Threading::Primitives
@ -25,15 +27,23 @@ namespace Aurora::Threading::Primitives
static auline ThreadCookie_t GetThreadCookie()
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
#if defined(AURORA_IS_32BIT)
#if defined(AURORA_ARCH_X86)
__asm {
mov eax, fs:[0x18]
mov eax, [eax + 0x24]
ret
}
#else
#elif defined(AURORA_ARCH_X64)
return ::GetCurrentThreadIDFast();
#else
return GetCurrentThreadId();
#endif
#elif defined(AURORA_IS_LINUX_DERIVED) && defined(AURORA_ARCH_X64)
// _readfsbase_u64 is the modern gcc/intel/msft version of __readgsqword that we need.
// glibc uses fs[0] = "self" with a note pthreads doesnt use this, fs[1]=dtv counter,fs[2]=self (see: tcbhead_t)
// musl uses fs[0] = pSelf, dtv counter, * unused *, * unused *
return (ThreadCookie_t)__readfsqword(0);
//return (ThreadCookie_t)_readfsbase_u64(0);
#elif defined(AURORA_IS_POSIX_DERIVED)
// pthread_self is usually a c-friendly a macro in a header that reads from a segment
// though the result is always a pointer wide