Rollup merge of #126827 - the8472:pidfd-spawn, r=workingjubilee

Use pidfd_spawn for faster process spawning when a PidFd is requested

glibc 2.39 added `pidfd_spawnp` and `pidfd_getpid` which makes it possible to get pidfds while staying on the CLONE_VFORK path.

verified that vfork gets used with strace:

```
$  strace -ff -e pidfd_open,clone3,openat,execve,waitid,close ./x test std --no-doc -- pidfd
[...]
[pid 2820532] clone3({flags=CLONE_VM|CLONE_PIDFD|CLONE_VFORK|CLONE_CLEAR_SIGHAND, pidfd=0x7b7f885fec6c, exit_signal=SIGCHLD, stack=0x7b7f88aff000, stack_size=0x9000}strace: Process 2820533 attached
 <unfinished ...>
[pid 2820533] execve("/home/the8472/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */) = -1 ENOENT (No such file or directory)
[pid 2820533] execve("/home/the8472/.cargo/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */) = -1 ENOENT (No such file or directory)
[pid 2820533] execve("/usr/local/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */) = -1 ENOENT (No such file or directory)
[pid 2820533] execve("/usr/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */ <unfinished ...>
[pid 2820532] <... clone3 resumed> => {pidfd=[3]}, 88) = 2820533
[pid 2820533] <... execve resumed>)     = 0
[pid 2820532] openat(AT_FDCWD, "/proc/self/fdinfo/3", O_RDONLY|O_CLOEXEC) = 4
[pid 2820532] close(4)                  = 0
```

Tracking issue: #82971
This commit is contained in:
Matthias Krüger 2024-07-12 14:37:58 +02:00 committed by GitHub
commit f9b3e8b387
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 131 additions and 6 deletions

View File

@ -1,7 +1,7 @@
use crate::assert_matches::assert_matches;
use crate::os::fd::{AsRawFd, RawFd};
use crate::os::linux::process::{ChildExt, CommandExt};
use crate::os::unix::process::ExitStatusExt;
use crate::os::linux::process::{ChildExt, CommandExt as _};
use crate::os::unix::process::{CommandExt as _, ExitStatusExt};
use crate::process::Command;
#[test]
@ -21,6 +21,7 @@ fn test_command_pidfd() {
let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
assert!(flags & libc::FD_CLOEXEC != 0);
}
assert!(child.id() > 0 && child.id() < -1i32 as u32);
let status = child.wait().expect("error waiting on pidfd");
assert_eq!(status.code(), Some(1));
@ -42,6 +43,17 @@ fn test_command_pidfd() {
.unwrap()
.pidfd()
.expect_err("pidfd should not have been created");
// exercise the fork/exec path since the earlier attempts may have used pidfd_spawnp()
let mut child =
unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
assert!(child.id() > 0 && child.id() < -1i32 as u32);
if pidfd_open_available {
assert!(child.pidfd().is_ok())
}
child.wait().expect("error waiting on child");
}
#[test]

View File

@ -305,10 +305,13 @@ fn is_minus_one(&self) -> bool {
impl_is_minus_one! { i8 i16 i32 i64 isize }
/// Convert native return values to Result using the *-1 means error is in `errno`* convention.
/// Non-error values are `Ok`-wrapped.
pub fn cvt<T: IsMinusOne>(t: T) -> crate::io::Result<T> {
if t.is_minus_one() { Err(crate::io::Error::last_os_error()) } else { Ok(t) }
}
/// `-1` → look at `errno` → retry on `EINTR`. Otherwise `Ok()`-wrap the closure return value.
pub fn cvt_r<T, F>(mut f: F) -> crate::io::Result<T>
where
T: IsMinusOne,
@ -323,6 +326,7 @@ pub fn cvt_r<T, F>(mut f: F) -> crate::io::Result<T>
}
#[allow(dead_code)] // Not used on all platforms.
/// Zero means `Ok()`, all other values are treated as raw OS errors. Does not look at `errno`.
pub fn cvt_nz(error: libc::c_int) -> crate::io::Result<()> {
if error == 0 { Ok(()) } else { Err(crate::io::Error::from_raw_os_error(error)) }
}

View File

@ -449,17 +449,82 @@ fn posix_spawn(
use crate::mem::MaybeUninit;
use crate::sys::weak::weak;
use crate::sys::{self, cvt_nz, on_broken_pipe_flag_used};
#[cfg(target_os = "linux")]
use core::sync::atomic::{AtomicU8, Ordering};
if self.get_gid().is_some()
|| self.get_uid().is_some()
|| (self.env_saw_path() && !self.program_is_path())
|| !self.get_closures().is_empty()
|| self.get_groups().is_some()
|| self.get_create_pidfd()
{
return Ok(None);
}
cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {
weak! {
fn pidfd_spawnp(
*mut libc::c_int,
*const libc::c_char,
*const libc::posix_spawn_file_actions_t,
*const libc::posix_spawnattr_t,
*const *mut libc::c_char,
*const *mut libc::c_char
) -> libc::c_int
}
weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int }
static PIDFD_SUPPORTED: AtomicU8 = AtomicU8::new(0);
const UNKNOWN: u8 = 0;
const SPAWN: u8 = 1;
// Obtaining a pidfd via the fork+exec path might work
const FORK_EXEC: u8 = 2;
// Neither pidfd_spawn nor fork/exec will get us a pidfd.
// Instead we'll just posix_spawn if the other preconditions are met.
const NO: u8 = 3;
if self.get_create_pidfd() {
let mut support = PIDFD_SUPPORTED.load(Ordering::Relaxed);
if support == FORK_EXEC {
return Ok(None);
}
if support == UNKNOWN {
support = NO;
let our_pid = crate::process::id();
let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as c_int);
match pidfd {
Ok(pidfd) => {
support = FORK_EXEC;
if let Some(Ok(pid)) = pidfd_getpid.get().map(|f| cvt(unsafe { f(pidfd) } as i32)) {
if pidfd_spawnp.get().is_some() && pid as u32 == our_pid {
support = SPAWN
}
}
unsafe { libc::close(pidfd) };
}
Err(e) if e.raw_os_error() == Some(libc::EMFILE) => {
// We're temporarily(?) out of file descriptors. In this case obtaining a pidfd would also fail
// Don't update the support flag so we can probe again later.
return Err(e)
}
_ => {}
}
PIDFD_SUPPORTED.store(support, Ordering::Relaxed);
if support == FORK_EXEC {
return Ok(None);
}
}
core::assert_matches::debug_assert_matches!(support, SPAWN | NO);
}
} else {
if self.get_create_pidfd() {
unreachable!("only implemented on linux")
}
}
}
// Only glibc 2.24+ posix_spawn() supports returning ENOENT directly.
#[cfg(all(target_os = "linux", target_env = "gnu"))]
{
@ -543,9 +608,6 @@ fn posix_spawn_file_actions_addchdir_np(
let pgroup = self.get_pgroup();
// Safety: -1 indicates we don't have a pidfd.
let mut p = unsafe { Process::new(0, -1) };
struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
impl Drop for PosixSpawnFileActions<'_> {
@ -640,6 +702,47 @@ fn drop(&mut self) {
#[cfg(target_os = "nto")]
let spawn_fn = retrying_libc_posix_spawnp;
#[cfg(target_os = "linux")]
if self.get_create_pidfd() && PIDFD_SUPPORTED.load(Ordering::Relaxed) == SPAWN {
let mut pidfd: libc::c_int = -1;
let spawn_res = pidfd_spawnp.get().unwrap()(
&mut pidfd,
self.get_program_cstr().as_ptr(),
file_actions.0.as_ptr(),
attrs.0.as_ptr(),
self.get_argv().as_ptr() as *const _,
envp as *const _,
);
let spawn_res = cvt_nz(spawn_res);
if let Err(ref e) = spawn_res
&& e.raw_os_error() == Some(libc::ENOSYS)
{
PIDFD_SUPPORTED.store(FORK_EXEC, Ordering::Relaxed);
return Ok(None);
}
spawn_res?;
let pid = match cvt(pidfd_getpid.get().unwrap()(pidfd)) {
Ok(pid) => pid,
Err(e) => {
// The child has been spawned and we are holding its pidfd.
// But we cannot obtain its pid even though pidfd_getpid support was verified earlier.
// This might happen if libc can't open procfs because the file descriptor limit has been reached.
libc::close(pidfd);
return Err(Error::new(
e.kind(),
"pidfd_spawnp succeeded but the child's PID could not be obtained",
));
}
};
return Ok(Some(Process::new(pid, pidfd)));
}
// Safety: -1 indicates we don't have a pidfd.
let mut p = Process::new(0, -1);
let spawn_res = spawn_fn(
&mut p.pid,
self.get_program_cstr().as_ptr(),
@ -786,6 +889,12 @@ pub struct Process {
impl Process {
#[cfg(target_os = "linux")]
/// # Safety
///
/// `pidfd` must either be -1 (representing no file descriptor) or a valid, exclusively owned file
/// descriptor (See [I/O Safety]).
///
/// [I/O Safety]: crate::io#io-safety
unsafe fn new(pid: pid_t, pidfd: pid_t) -> Self {
use crate::os::unix::io::FromRawFd;
use crate::sys_common::FromInner;