AuroraRuntime/Source/AuProcAddresses.Linux.cpp
J Reece Wilson 7cc41f1359 [*] Fix: axboe is a piss baby retard who cant even write a thread safe ring buffer
This is the dude tasked with managing Linshits IO stack, including the fundamentally flawed muh zero-syscall pissring (#a), the fucked io_submit apis that never worked properly (incl io_cancel that spuriously returns EINVAL despite all magics matching); the retard who stuck to Linus's idea that MUH O_DIRECT IS LE EVIL YOU MUST USE MY CACHES REEE REEE (#b); and the retard who got indirectly called out by Linus as being apart of the database maintainers who Linus didn't/doesnt like and who wanted these APIs in the first place (#c).

 #a
 This dumb cunt can't even write a ring buffer. He thinks the solution to everything is spawn a thread, like the glibc and libuv retards.
 He seems to think a ring buffer also magically hooks into a thread scheduler, and something something, orange lingo, muh zero syscall overhead as if the kernel should be/is constantly checking every thread for an arbitrary amount of IO ring buffers.

 #b
"The whole notion of "direct IO" is totally braindamaged. Just say no.

	This is your brain: O
	This is your brain on O_DIRECT: .

	Any questions?

I should have fought back harder. There really is no valid reason for EVER
using O_DIRECT. You need a buffer whatever IO you do, and it might as well
be the page cache. There are better ways to control the page cache than
play games and think that a page cache isn't necessary.

So don't use O_DIRECT."
 - Commie Torbals, maintainer of toy academic kernels

 #c
"AIO is a horrible ad-hoc design, with the main excuse being "other, less gifted people, made that design, and we are implementing it for compatibility because database people - who seldom have any shred of taste - actually use it". But AIO was always really really ugly."
 - Commie Torbals, maintainer of toy academic kernels

"Jens works for Oracle"
 - https://web.archive.org/web/20130402030321/https://events.linuxfoundation.org/jls06b

And that's not to mention Google and NodeJS has disabled the usage of his pissring implementation because of shit like this. His shit code has been and will continue to be a security risk. I can't blame them. This retard mustn't be trusted with so much as an 8 year olds lemonade stand.
2024-08-05 05:39:50 +01:00

426 lines
12 KiB
C++

/***
Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuProcAddresses.Linux.cpp
Date: 2023-8-11
Author: Reece
***/
#include <RuntimeInternal.hpp>
#include <sys/syscall.h>
#include <linux/futex.h>
#include <Time/Time.hpp>
#include <dlfcn.h>
#define AURORA_IS_GLIBC
#if !defined(__NR_close_range)
#define __NR_close_range 436
#endif
namespace Aurora
{
static const auto kAioRingMagic = 0xa10a10a1u;
void InitLinuxAddresses()
{
pgetsockname = getsockname;
#if defined(RTLD_NEXT)
p__cxa_throw = (decltype(p__cxa_throw))dlsym(RTLD_NEXT, "__cxa_throw");
p_Unwind_RaiseException = (decltype(p_Unwind_RaiseException))dlsym(RTLD_NEXT, "_Unwind_RaiseException");
#endif
#if defined(RTLD_DEFAULT)
if (!p_Unwind_RaiseException)
{
p_Unwind_RaiseException = (decltype(p_Unwind_RaiseException))dlsym(RTLD_DEFAULT, "_Unwind_RaiseException");
}
if (!p__cxa_throw)
{
p__cxa_throw = (decltype(p__cxa_throw))dlsym(RTLD_DEFAULT, "__cxa_throw");
}
pgai_error = (decltype(pgai_error))dlsym(RTLD_DEFAULT, "gai_error");
pgai_cancel = (decltype(pgai_cancel))dlsym(RTLD_DEFAULT, "gai_cancel");
pgetaddrinfo_a = (decltype(pgetaddrinfo_a))dlsym(RTLD_DEFAULT, "getaddrinfo_a");
#endif
}
template <typename... T>
long syscallFuckYou(T &&... args)
{
// sysdeps/unix/sysv/linux/x86_64/syscall.S
// Fuck Freetards
long iFuckResult = syscall(AuForward<T &&>(args)...);
#if defined(AURORA_IS_GLIBC)
if (iFuckResult == -1 &&
errno > 0)
{
return (0 - errno);
}
#else
// TODO: if defined UNIX has a libc wrapper. why would we assume there's a CRT to begin with?
errno = (0 - iFuckResult);
#endif
return iFuckResult;
// Imagine going out of your way to define a varadic syscall wrapper that works without any special formatting parameters,
// works across all abis, just to fuck it into uselessness by returning -ENOSYS and -1 spuriously.
// Nooo we cant just have all the != 0 and if (n >= 0) checks pass, we must enforce `error == -1` is true everywhere as a convention!!!
// People trying to interface with the kernel directly must never know what the kernel actually said!!!
// Look through all their (GNU) garbage hand written assembly *and* C macros, you'll see its litered with SYSCALL_ERROR_LABEL,
// because i swear to god it looks like cmpq and jae are the only instructions they know how to use.
// Worse, its rationalized as and I quote,
// "Linus said he will make sure the no syscall returns a value in -1 .. -4095 as a valid result so we can safely test with -4095"
// So these copy/pasted instructions are hard-coding a "linus said so once"-based test, and now Linux-like OSes are forever limited to 4k errors?
// I guess it's like the other FUTEX issue where every single thead and every single process under Linux is bound to one ABI defined by glib,
// and the kernels stance on the matter is, I quote "must only be changed if the change is first communicated with the glibc folks."
// That meaning, it doesn't matter because they're just going to half-ass things together holding hands.
// Daily reminder, Lin-shit is half-assed HW abstraction layer held together with forced driver source sharing and glibc+freeedesktop hopes and dreams.
// Fucking retards, I swear
}
int pidfd_getfd(int pidfd, int targetfd,
unsigned int flags)
{
return syscallFuckYou(__NR_pidfd_getfd, pidfd, targetfd, flags);
}
int pidfd_open(pid_t pid, unsigned int flags)
{
return syscallFuckYou(__NR_pidfd_open, pid, flags);
}
long set_robust_list(struct robust_list_head *head, size_t len)
{
return syscallFuckYou(__NR_set_robust_list, head, len);
}
long get_robust_list(int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
{
return syscallFuckYou(__NR_get_robust_list, pid, head_ptr, len_ptr);
}
static int futex(uint32_t *uaddr, int futex_op, uint32_t val,
const struct timespec *timeout,
uint32_t *uaddr2, uint32_t val3)
{
return syscallFuckYou(__NR_futex, uaddr, futex_op, val, timeout, uaddr2, val3);
}
int futex_wait(uint32_t *addr, uint32_t expected)
{
return futex(addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, expected, 0, 0, 0);
}
int futex_wait(uint32_t *addr, uint32_t expected, const struct timespec *timeout)
{
if (timeout)
{
return futex(addr, FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG, expected, timeout, 0, FUTEX_BITSET_MATCH_ANY);
}
else
{
return futex(addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, expected, timeout, 0, 0);
}
}
int futex_wait_shared(uint32_t *addr, uint32_t expected, const struct timespec *timeout)
{
if (timeout)
{
return futex(addr, FUTEX_WAIT_BITSET, expected, timeout, 0, FUTEX_BITSET_MATCH_ANY);
}
else
{
return futex(addr, FUTEX_WAIT, expected, timeout, 0, 0);
}
}
int futex_wake(uint32_t *addr, uint32_t nthreads)
{
return futex(addr, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, nthreads, 0, 0, 0);
}
int futex_wake_shared(uint32_t *addr, uint32_t nthreads)
{
return futex(addr, FUTEX_WAKE, nthreads, 0, 0, 0);
}
int futex_wait(volatile uint32_t *addr, uint32_t expected)
{
return futex((uint32_t *)addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, expected, 0, 0, 0);
}
int futex_wait(volatile uint32_t *addr, uint32_t expected, const struct timespec *timeout)
{
if (timeout)
{
return futex((uint32_t *)addr, FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG, expected, timeout, 0, FUTEX_BITSET_MATCH_ANY);
}
else
{
return futex((uint32_t *)addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, expected, timeout, 0, 0);
}
}
int futex_wake(volatile uint32_t *addr, uint32_t nthreads)
{
return futex((uint32_t *)addr, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, nthreads, 0, 0, 0);
}
int io_setup(unsigned nr, aio_context_t *ctxp)
{
return syscallFuckYou(__NR_io_setup, nr, ctxp);
}
int io_destroy(aio_context_t ctx)
{
return syscallFuckYou(__NR_io_destroy, ctx);
}
int io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
{
return syscallFuckYou(__NR_io_submit, ctx, nr, iocbpp);
}
#if 0
int io_getevents(aio_context_t ctx, long min_nr, long max_nr,
struct io_event *events,
struct timespec *timeout)
{
return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout);
}
#endif
struct aio_ring
{
unsigned id;
unsigned nr;
unsigned head;
unsigned tail;
unsigned magic;
unsigned compat_features;
unsigned incompat_features;
unsigned header_length;
struct io_event events[0];
};
int io_getevents(aio_context_t ctx,
long min_nr, long max_nr,
struct io_event *events,
struct timespec *timeout)
{
int i {};
auto pRing = (struct aio_ring *)ctx;
if (!pRing ||
pRing->magic != kAioRingMagic)
{
goto do_syscall;
}
while (i < max_nr)
{
auto head = AuAtomicLoad(&pRing->head);
if (head == pRing->tail)
{
break;
}
events[i++] = pRing->events[head];
auto nextHead = (head + 1) % pRing->nr;
if (AuAtomicCompareExchange(&pRing->head,
nextHead,
head) != head)
{
i--;
}
}
if (!i &&
timeout &&
!timeout->tv_sec &&
!timeout->tv_nsec)
{
return 0;
}
if (i &&
i >= min_nr)
{
return i;
}
do_syscall:
int iKernelCount {};
if ((iKernelCount = syscallFuckYou(__NR_io_getevents,
ctx, min_nr - i,
max_nr - i,
&events[i], timeout)) > 0)
{
return i + iKernelCount;
}
else if (i)
{
return i;
}
else
{
return iKernelCount;
}
}
int io_cancel(aio_context_t ctx_id, struct iocb *iocb,
struct io_event *result)
{
return syscallFuckYou(__NR_io_cancel, ctx_id, iocb, result);
}
ssize_t sys_getrandom(void *pBuffer, size_t uLength)
{
static AuUInt32 gShouldNotGetRand {};
ssize_t ret {};
if (gShouldNotGetRand)
{
return -ENOSYS;
}
ret = syscallFuckYou(__NR_getrandom, pBuffer, uLength, 1);
if (ret == -ENOSYS)
{
gShouldNotGetRand = 1;
}
return ret;
}
int close_range(unsigned int first, unsigned int last,
unsigned int flags)
{
auto &platform = AuSwInfo::GetPlatformInfo();
if (platform.uKernelMajor > 5 ||
(platform.uKernelMajor == 5 && platform.uKernelMajor >= 9))
{
return syscallFuckYou(__NR_close_range, first, last, flags);
}
else
{
return -ENOSYS;
}
}
bool SysNativeWaitOnAddressFutexSupported()
{
return true;
}
bool SysWaitOnAddressNoTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
int ret {};
#if defined(AU_CPU_ENDIAN_BIG)
if (uWordSize == 8)
{
pTargetAddress = AuReinterpretCast<const char *>(pTargetAddress) + 4;
pCompareAddress = AuReinterpretCast<const char *>(pCompareAddress) + 4;
}
#endif
auto uCurrent = *(AuUInt32 *)pCompareAddress;
do
{
ret = futex_wait((AuUInt32 *)pTargetAddress, uCurrent, nullptr);
if (ret == 0)
{
continue;
}
if (ret == -EAGAIN)
{
continue;
}
if (ret == -ETIMEDOUT)
{
return false;
}
}
while (ret == -EINTR);
return AuMemcmp(pCompareAddress, pTargetAddress, uWordSize) != 0;
}
bool SysWaitOnAddressTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 uAbsTimeSteadyClock,
AuUInt64 uRelativeNanoseconds,
AuOptional<AuUInt64> uAbsTimeAltClock,
bool bSpun)
{
int ret {};
#if defined(AU_CPU_ENDIAN_BIG)
if (uWordSize == 8)
{
pTargetAddress = AuReinterpretCast<const char *>(pTargetAddress) + 4;
pCompareAddress = AuReinterpretCast<const char *>(pCompareAddress) + 4;
}
#endif
auto uCurrent = *(AuUInt32 *)pCompareAddress;
struct timespec tspec;
Time::monoabsns2ts(&tspec, uAbsTimeAltClock ? uAbsTimeAltClock.value() : uAbsTimeSteadyClock);
do
{
ret = futex_wait((AuUInt32 *)pTargetAddress, uCurrent, &tspec);
if (ret == 0)
{
continue;
}
if (ret == -EAGAIN)
{
continue;
}
if (ret == -ETIMEDOUT)
{
return false;
}
}
while (ret == -EINTR);
return AuMemcmp(pCompareAddress, pTargetAddress, uWordSize) != 0;
}
void SysWakeNOnAddress(const void *pAddress,
AuUInt32 dwCount)
{
futex_wake((AuUInt32 *)pAddress, dwCount);
}
void SysWakeAllOnAddress(const void *pAddress)
{
futex_wake((AuUInt32 *)pAddress, INT_MAX);
}
void SysWakeOneOnAddress(const void *pAddress)
{
futex_wake((AuUInt32 *)pAddress, 1);
}
}