AuroraRuntime/Source/AuProcAddresses.Linux.cpp
Jamie Reece Wilson b5c4271807 [*] Some WTF
[*] Use waitpid instead of wait3 (BSD, now deprecated by POSIX)
[*] Win32 / x86_32: I guess the file map maximum size should be the same as 64bit since it lets us? we need this span constraint to be the entire file or less.
[*] Improved AuProcess UNIX signal safety
[*] Comments
[+] Secret API: RuntimeCollectMemory
2024-10-12 16:30:12 +01:00

463 lines
17 KiB
C++

/***
Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuProcAddresses.Linux.cpp
Date: 2023-8-11
Author: Reece
***/
#include <RuntimeInternal.hpp>
#include <sys/syscall.h>
#include <linux/futex.h>
#include <Time/Time.hpp>
#include <dlfcn.h>
#define AURORA_IS_GLIBC
#if !defined(__NR_close_range)
#define __NR_close_range 436
#endif
namespace Aurora
{
static const auto kAioRingMagic = 0xa10a10a1u;
void InitLinuxAddresses()
{
pgetsockname = getsockname;
#if defined(RTLD_NEXT)
p__cxa_throw = (decltype(p__cxa_throw))dlsym(RTLD_NEXT, "__cxa_throw");
p_Unwind_RaiseException = (decltype(p_Unwind_RaiseException))dlsym(RTLD_NEXT, "_Unwind_RaiseException");
#endif
#if defined(RTLD_DEFAULT)
if (!p_Unwind_RaiseException)
{
p_Unwind_RaiseException = (decltype(p_Unwind_RaiseException))dlsym(RTLD_DEFAULT, "_Unwind_RaiseException");
}
if (!p__cxa_throw)
{
p__cxa_throw = (decltype(p__cxa_throw))dlsym(RTLD_DEFAULT, "__cxa_throw");
}
pgai_error = (decltype(pgai_error))dlsym(RTLD_DEFAULT, "gai_error");
pgai_cancel = (decltype(pgai_cancel))dlsym(RTLD_DEFAULT, "gai_cancel");
pgetaddrinfo_a = (decltype(pgetaddrinfo_a))dlsym(RTLD_DEFAULT, "getaddrinfo_a");
#endif
}
template <typename... T>
long syscallFuckYou(T &&... args)
{
// xref: glibc://sysdeps/unix/sysv/linux/x86_64/syscall.S
long iFuckResult = syscall(AuForward<T &&>(args)...);
#if defined(AURORA_IS_GLIBC)
if (iFuckResult == -1 &&
errno > 0)
{
return (0 - errno);
}
#else
// TODO: if defined UNIX has a libc wrapper. why would we assume there's a CRT to begin with?
errno = (0 - iFuckResult);
#endif
return iFuckResult;
// Imagine going out of your way to define a varadic syscall wrapper that works without any special formatting parameters,
// works across all abis, just to fuck it into uselessness by returning -ENOSYS and -1 spuriously.
// Nooo we cant just have all the != 0 and if (n >= 0) checks pass, we must enforce `error == -1` is true everywhere as a convention!!!
// People trying to interface with the kernel directly must never know what the kernel actually said!!!
// Look through all their (GNU) garbage hand written assembly *and* C macros, you'll see its litered with SYSCALL_ERROR_LABEL,
// because i swear to god it looks like cmpq and jae are the only instructions they know how to use.
// Worse, its rationalized as and I quote,
// "Linus said he will make sure the no syscall returns a value in -1 .. -4095 as a valid result so we can safely test with -4095"
// So these copy/pasted instructions are hard-coding a "linus said so once"-based test, and now Linux-like OSes are forever limited to 4k errors?
// I guess it's like the other FUTEX issue where every single thead and every single process under Linux is bound to one ABI defined by glib,
// and the kernels stance on the matter is, I quote "must only be changed if the change is first communicated with the glibc folks."
// That meaning, it doesn't matter because they're just going to half-ass things together holding hands.
// Daily reminder, Linshit is half-assed HW abstraction layer held together with forced driver source sharing and glibc+freeedesktop hopes and dreams.
}
int pidfd_getfd(int pidfd, int targetfd,
unsigned int flags)
{
return syscallFuckYou(__NR_pidfd_getfd, pidfd, targetfd, flags);
}
int pidfd_open(pid_t pid, unsigned int flags)
{
return syscallFuckYou(__NR_pidfd_open, pid, flags);
}
long set_robust_list(struct robust_list_head *head, size_t len)
{
return syscallFuckYou(__NR_set_robust_list, head, len);
}
long get_robust_list(int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
{
return syscallFuckYou(__NR_get_robust_list, pid, head_ptr, len_ptr);
}
static int futex(uint32_t *uaddr, int futex_op, uint32_t val,
const struct timespec *timeout,
uint32_t *uaddr2, uint32_t val3)
{
return syscallFuckYou(__NR_futex, uaddr, futex_op, val, timeout, uaddr2, val3);
}
int futex_wait(uint32_t *addr, uint32_t expected)
{
return futex(addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, expected, 0, 0, 0);
}
int futex_wait(uint32_t *addr, uint32_t expected, const struct timespec *timeout)
{
if (timeout)
{
return futex(addr, FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG, expected, timeout, 0, FUTEX_BITSET_MATCH_ANY);
}
else
{
return futex(addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, expected, timeout, 0, 0);
}
}
int futex_wait_shared(uint32_t *addr, uint32_t expected, const struct timespec *timeout)
{
if (timeout)
{
return futex(addr, FUTEX_WAIT_BITSET, expected, timeout, 0, FUTEX_BITSET_MATCH_ANY);
}
else
{
return futex(addr, FUTEX_WAIT, expected, timeout, 0, 0);
}
}
int futex_wake(uint32_t *addr, uint32_t nthreads)
{
return futex(addr, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, nthreads, 0, 0, 0);
}
int futex_wake_shared(uint32_t *addr, uint32_t nthreads)
{
return futex(addr, FUTEX_WAKE, nthreads, 0, 0, 0);
}
int futex_wait(volatile uint32_t *addr, uint32_t expected)
{
return futex((uint32_t *)addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, expected, 0, 0, 0);
}
int futex_wait(volatile uint32_t *addr, uint32_t expected, const struct timespec *timeout)
{
if (timeout)
{
return futex((uint32_t *)addr, FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG, expected, timeout, 0, FUTEX_BITSET_MATCH_ANY);
}
else
{
return futex((uint32_t *)addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, expected, timeout, 0, 0);
}
}
int futex_wake(volatile uint32_t *addr, uint32_t nthreads)
{
return futex((uint32_t *)addr, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, nthreads, 0, 0, 0);
}
int io_setup(unsigned nr, aio_context_t *ctxp)
{
return syscallFuckYou(__NR_io_setup, nr, ctxp);
}
int io_destroy(aio_context_t ctx)
{
return syscallFuckYou(__NR_io_destroy, ctx);
}
int io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
{
return syscallFuckYou(__NR_io_submit, ctx, nr, iocbpp);
}
#if 0
int io_getevents(aio_context_t ctx, long min_nr, long max_nr,
struct io_event *events,
struct timespec *timeout)
{
return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout);
}
#endif
struct aio_ring
{
unsigned id;
unsigned nr;
unsigned head;
unsigned tail;
unsigned magic;
unsigned compat_features;
unsigned incompat_features;
unsigned header_length;
struct io_event events[0];
};
int io_getevents(aio_context_t ctx,
long min_nr, long max_nr,
struct io_event *events,
struct timespec *timeout,
bool bStrictUserspaceOnly)
{
int i {};
auto pRing = (struct aio_ring *)ctx;
if (!pRing ||
pRing->magic != kAioRingMagic)
{
goto do_syscall;
}
while (i < max_nr)
{
auto head = AuAtomicLoad(&pRing->head);
if (head == pRing->tail)
{
break;
}
events[i++] = pRing->events[head];
auto nextHead = (head + 1) % pRing->nr;
if (AuAtomicCompareExchange(&pRing->head,
nextHead,
head) != head)
{
i--;
}
}
if (!i &&
timeout &&
!timeout->tv_sec &&
!timeout->tv_nsec)
{
return 0;
}
if (i &&
i >= min_nr)
{
return i;
}
if (bStrictUserspaceOnly)
{
return i;
}
do_syscall:
int iKernelCount {};
if ((iKernelCount = syscallFuckYou(__NR_io_getevents,
ctx, min_nr - i,
max_nr - i,
&events[i], timeout)) > 0)
{
return i + iKernelCount;
}
else if (i)
{
return i;
}
else
{
return iKernelCount;
}
}
int io_cancel(aio_context_t ctx_id, struct iocb *iocb,
struct io_event *result)
{
return syscallFuckYou(__NR_io_cancel, ctx_id, iocb, result);
}
ssize_t sys_getrandom(void *pBuffer, size_t uLength)
{
static AuUInt32 gShouldNotGetRand {};
ssize_t ret {};
if (gShouldNotGetRand)
{
return -ENOSYS;
}
ret = syscallFuckYou(__NR_getrandom, pBuffer, uLength, 1);
if (ret == -ENOSYS)
{
gShouldNotGetRand = 1;
}
return ret;
}
int close_range(unsigned int first, unsigned int last,
unsigned int flags)
{
auto &platform = AuSwInfo::GetPlatformInfo();
if (platform.uKernelMajor > 5 ||
(platform.uKernelMajor == 5 && platform.uKernelMajor >= 9))
{
return syscallFuckYou(__NR_close_range, first, last, flags);
}
else
{
return -ENOSYS;
}
}
bool SysNativeWaitOnAddressFutexSupported()
{
return true;
}
bool SysWaitOnAddressNoTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
int ret {};
#if defined(AU_CPU_ENDIAN_BIG)
if (uWordSize == 8)
{
pTargetAddress = AuReinterpretCast<const char *>(pTargetAddress) + 4;
pCompareAddress = AuReinterpretCast<const char *>(pCompareAddress) + 4;
}
#endif
auto uCurrent = *(AuUInt32 *)pCompareAddress;
do
{
ret = futex_wait((AuUInt32 *)pTargetAddress, uCurrent, nullptr);
if (ret == 0)
{
continue;
}
if (ret == -EAGAIN)
{
continue;
}
if (ret == -ETIMEDOUT)
{
return false;
}
}
while (ret == -EINTR);
return AuMemcmp(pCompareAddress, pTargetAddress, uWordSize) != 0;
}
bool SysWaitOnAddressTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 uAbsTimeSteadyClock,
AuUInt64 uRelativeNanoseconds,
AuOptional<AuUInt64> uAbsTimeAltClock,
bool bSpun)
{
int ret {};
#if defined(AU_CPU_ENDIAN_BIG)
if (uWordSize == 8)
{
pTargetAddress = AuReinterpretCast<const char *>(pTargetAddress) + 4;
pCompareAddress = AuReinterpretCast<const char *>(pCompareAddress) + 4;
}
#endif
auto uCurrent = *(AuUInt32 *)pCompareAddress;
struct timespec tspec;
Time::monoabsns2ts(&tspec, uAbsTimeAltClock ? uAbsTimeAltClock.value() : uAbsTimeSteadyClock);
do
{
ret = futex_wait((AuUInt32 *)pTargetAddress, uCurrent, &tspec);
if (ret == 0)
{
continue;
}
if (ret == -EAGAIN)
{
continue;
}
if (ret == -ETIMEDOUT)
{
return false;
}
}
while (ret == -EINTR);
return AuMemcmp(pCompareAddress, pTargetAddress, uWordSize) != 0;
}
void SysWakeNOnAddress(const void *pAddress,
AuUInt32 dwCount)
{
futex_wake((AuUInt32 *)pAddress, dwCount);
}
void SysWakeAllOnAddress(const void *pAddress)
{
futex_wake((AuUInt32 *)pAddress, INT_MAX);
}
void SysWakeOneOnAddress(const void *pAddress)
{
futex_wake((AuUInt32 *)pAddress, 1);
}
// TODO: transition the externally linked POSIX-like symbols down to internally linked symbols.
// GNU versioning is terrible and halfassed.
// arbitrary symbols of posix and the c[rt] spuriously change their required glibc version on a dime.
// this thereby requires the users' platform to update a write-protected distro binary (/???/ld-???.so) of a min-ver linker to be paired with at least min-ver of glibc as specified by a "portable" elf section.
// not only must glibc extensions be dynamically loaded, we will probably end up implementing a bit of posix over a generic crt-provided syscall func, what with this GNU stupidity still plaguing everything Linux.
// build chains contaminated with glibc are hopeless without our portable glibc post-build scripts. also worthy of note, musl doesnt implement some tls bits and some dumb string function nvidia requires.
// no matter what, we need to expect glibc, aim for musl, and know that vendor tie in means we're almost (not really) forced into using glibc in prod *
// ( *: prebuilt arm gpu binaries, prebuilt nvidia drivers, cisco signed openh264 with prepaid royalties, etc. we really shouldn't need binary patches or supplemental symbols to link these. )
// ps: just to prove how fucking asinine glibc is:
// pthread_setspecific@GLIBC_2.34 pthread_attr_setstacksize@GLIBC_2.34 (2.38 = 2024/Q1, 2.34 = 2021/Q3, 2.33 = 2021/Q1, 2.32 = 2020/Q3)
// pthread_sigmask@GLIBC_2.32 __isoc23_strtoull@GLIBC_2.38
// __isoc23_sscanf@GLIBC_2.38 dlopen@GLIBC_2.34
// dladdr@GLIBC_2.34 pthread_cancel@GLIBC_2.34
// __isoc23_strtoll@GLIBC_2.38 pthread_cond_timedwait@GLIBC_2.3.2
// __isoc23_wcstoul@GLIBC_2.38 pthread_getspecific@GLIBC_2.34
// pthread_join@GLIBC_2.34 pthread_key_create@GLIBC_2.34
// stat@GLIBC_2.33 fstat@GLIBC_2.33
// __isoc23_strtoll_l@GLIBC_2.38 shm_open@GLIBC_2.34
// gettid@GLIBC_2.30 __isoc23_wcstoll@GLIBC_2.38
// pthread_once@GLIBC_2.34 pthread_setaffinity_np@GLIBC_2.34
// dlsym@GLIBC_2.34 pthread_create@GLIBC_2.34
// pthread_setname_np@GLIBC_2.34 __isoc23_vsscanf@GLIBC_2.38
// __isoc23_strtoull_l@GLIBC_2.38 pthread_cond_destroy@GLIBC_2.3.2
// __isoc23_wcstol@GLIBC_2.38 __isoc23_wcstoull@GLIBC_2.38
// __isoc23_strtoul@GLIBC_2.38 lstat@GLIBC_2.33
// pthread_kill@GLIBC_2.34 __isoc23_strtol@GLIBC_2.38
// ( 2024/Q4 AuroraRuntime.Stage.Linux.x86_64.so )
// ( built under a glibc root )
// ( 34 bad imports out of 306 )
//
// wanna dlopen? wanna stat a file? call a pthread function of args: {pthread handle, const char *}? you know what? do you even know your thread id? ...i mean, wanna get your current posix process id?
// sorry, that'll be a 2022 glibc binary, a forced distro update, bash scripts to run an ELF binary of CWD != bindir, and a fuck you + pocket sand in the eyes + dagger in the ass for choosing GHANNUUU / FREEDUM
}