glibc/sysdeps/unix/sysv/linux/clone3.h

68 lines
2.3 KiB
C
Raw Normal View History

Add an internal wrapper for clone, clone2 and clone3 The clone3 system call (since Linux 5.3) provides a superset of the functionality of clone and clone2. It also provides a number of API improvements, including the ability to specify the size of the child's stack area which can be used by kernel to compute the shadow stack size when allocating the shadow stack. Add: extern int __clone_internal (struct clone_args *__cl_args, int (*__func) (void *__arg), void *__arg); to provide an abstract interface for clone, clone2 and clone3. 1. Simplify stack management for thread creation by passing both stack base and size to create_thread. 2. Consolidate clone vs clone2 differences into a single file. 3. Call __clone3 if HAVE_CLONE3_WAPPER is defined. If __clone3 returns -1 with ENOSYS, fall back to clone or clone2. 4. Use only __clone_internal to clone a thread. Since the stack size argument for create_thread is now unconditional, always pass stack size to create_thread. 5. Enable the public clone3 wrapper in the future after it has been added to all targets. NB: Sandbox will return ENOSYS on clone3 in both Chromium: The following revision refers to this bug: https://chromium.googlesource.com/chromium/src/+/218438259dd795456f0a48f67cbe5b4e520db88b commit 218438259dd795456f0a48f67cbe5b4e520db88b Author: Matthew Denton <mpdenton@chromium.org> Date: Thu Jun 03 20:06:13 2021 Linux sandbox: return ENOSYS for clone3 Because clone3 uses a pointer argument rather than a flags argument, we cannot examine the contents with seccomp, which is essential to preventing sandboxed processes from starting other processes. So, we won't be able to support clone3 in Chromium. This CL modifies the BPF policy to return ENOSYS for clone3 so glibc always uses the fallback to clone. Bug: 1213452 Change-Id: I7c7c585a319e0264eac5b1ebee1a45be2d782303 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2936184 Reviewed-by: Robert Sesek <rsesek@chromium.org> Commit-Queue: Matthew Denton <mpdenton@chromium.org> Cr-Commit-Position: refs/heads/master@{#888980} [modify] https://crrev.com/218438259dd795456f0a48f67cbe5b4e520db88b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc and Firefox: https://hg.mozilla.org/integration/autoland/rev/ecb4011a0c76 Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
2021-02-13 19:47:46 +00:00
/* The wrapper of clone3.
Copyright (C) 2021-2023 Free Software Foundation, Inc.
Add an internal wrapper for clone, clone2 and clone3 The clone3 system call (since Linux 5.3) provides a superset of the functionality of clone and clone2. It also provides a number of API improvements, including the ability to specify the size of the child's stack area which can be used by kernel to compute the shadow stack size when allocating the shadow stack. Add: extern int __clone_internal (struct clone_args *__cl_args, int (*__func) (void *__arg), void *__arg); to provide an abstract interface for clone, clone2 and clone3. 1. Simplify stack management for thread creation by passing both stack base and size to create_thread. 2. Consolidate clone vs clone2 differences into a single file. 3. Call __clone3 if HAVE_CLONE3_WAPPER is defined. If __clone3 returns -1 with ENOSYS, fall back to clone or clone2. 4. Use only __clone_internal to clone a thread. Since the stack size argument for create_thread is now unconditional, always pass stack size to create_thread. 5. Enable the public clone3 wrapper in the future after it has been added to all targets. NB: Sandbox will return ENOSYS on clone3 in both Chromium: The following revision refers to this bug: https://chromium.googlesource.com/chromium/src/+/218438259dd795456f0a48f67cbe5b4e520db88b commit 218438259dd795456f0a48f67cbe5b4e520db88b Author: Matthew Denton <mpdenton@chromium.org> Date: Thu Jun 03 20:06:13 2021 Linux sandbox: return ENOSYS for clone3 Because clone3 uses a pointer argument rather than a flags argument, we cannot examine the contents with seccomp, which is essential to preventing sandboxed processes from starting other processes. So, we won't be able to support clone3 in Chromium. This CL modifies the BPF policy to return ENOSYS for clone3 so glibc always uses the fallback to clone. Bug: 1213452 Change-Id: I7c7c585a319e0264eac5b1ebee1a45be2d782303 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2936184 Reviewed-by: Robert Sesek <rsesek@chromium.org> Commit-Queue: Matthew Denton <mpdenton@chromium.org> Cr-Commit-Position: refs/heads/master@{#888980} [modify] https://crrev.com/218438259dd795456f0a48f67cbe5b4e520db88b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc and Firefox: https://hg.mozilla.org/integration/autoland/rev/ecb4011a0c76 Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
2021-02-13 19:47:46 +00:00
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#ifndef _CLONE3_H
#define _CLONE3_H 1
#include <features.h>
#include <stddef.h>
#include <bits/types.h>
__BEGIN_DECLS
/* The unsigned 64-bit and 8-byte aligned integer type. */
typedef __U64_TYPE __aligned_uint64_t __attribute__ ((__aligned__ (8)));
/* This struct should only be used in an argument to the clone3 system
call (along with its size argument). It may be extended with new
fields in the future. */
struct clone_args
{
/* Flags bit mask. */
__aligned_uint64_t flags;
/* Where to store PID file descriptor (pid_t *). */
__aligned_uint64_t pidfd;
/* Where to store child TID, in child's memory (pid_t *). */
__aligned_uint64_t child_tid;
/* Where to store child TID, in parent's memory (int *). */
__aligned_uint64_t parent_tid;
/* Signal to deliver to parent on child termination */
__aligned_uint64_t exit_signal;
/* The lowest address of stack. */
__aligned_uint64_t stack;
/* Size of stack. */
__aligned_uint64_t stack_size;
/* Location of new TLS. */
__aligned_uint64_t tls;
/* Pointer to a pid_t array (since Linux 5.5). */
__aligned_uint64_t set_tid;
/* Number of elements in set_tid (since Linux 5.5). */
__aligned_uint64_t set_tid_size;
/* File descriptor for target cgroup of child (since Linux 5.7). */
__aligned_uint64_t cgroup;
};
/* The wrapper of clone3. */
extern int clone3 (struct clone_args *__cl_args, size_t __size,
int (*__func) (void *__arg), void *__arg);
__END_DECLS
#endif /* clone3.h */