Rollup merge of #126827 - the8472:pidfd-spawn, r=workingjubilee
Use pidfd_spawn for faster process spawning when a PidFd is requested glibc 2.39 added `pidfd_spawnp` and `pidfd_getpid` which makes it possible to get pidfds while staying on the CLONE_VFORK path. verified that vfork gets used with strace: ``` $ strace -ff -e pidfd_open,clone3,openat,execve,waitid,close ./x test std --no-doc -- pidfd [...] [pid 2820532] clone3({flags=CLONE_VM|CLONE_PIDFD|CLONE_VFORK|CLONE_CLEAR_SIGHAND, pidfd=0x7b7f885fec6c, exit_signal=SIGCHLD, stack=0x7b7f88aff000, stack_size=0x9000}strace: Process 2820533 attached <unfinished ...> [pid 2820533] execve("/home/the8472/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */) = -1 ENOENT (No such file or directory) [pid 2820533] execve("/home/the8472/.cargo/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */) = -1 ENOENT (No such file or directory) [pid 2820533] execve("/usr/local/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */) = -1 ENOENT (No such file or directory) [pid 2820533] execve("/usr/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */ <unfinished ...> [pid 2820532] <... clone3 resumed> => {pidfd=[3]}, 88) = 2820533 [pid 2820533] <... execve resumed>) = 0 [pid 2820532] openat(AT_FDCWD, "/proc/self/fdinfo/3", O_RDONLY|O_CLOEXEC) = 4 [pid 2820532] close(4) = 0 ``` Tracking issue: #82971
This commit is contained in:
commit
f9b3e8b387
@ -1,7 +1,7 @@
|
||||
use crate::assert_matches::assert_matches;
|
||||
use crate::os::fd::{AsRawFd, RawFd};
|
||||
use crate::os::linux::process::{ChildExt, CommandExt};
|
||||
use crate::os::unix::process::ExitStatusExt;
|
||||
use crate::os::linux::process::{ChildExt, CommandExt as _};
|
||||
use crate::os::unix::process::{CommandExt as _, ExitStatusExt};
|
||||
use crate::process::Command;
|
||||
|
||||
#[test]
|
||||
@ -21,6 +21,7 @@ fn test_command_pidfd() {
|
||||
let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
|
||||
assert!(flags & libc::FD_CLOEXEC != 0);
|
||||
}
|
||||
assert!(child.id() > 0 && child.id() < -1i32 as u32);
|
||||
let status = child.wait().expect("error waiting on pidfd");
|
||||
assert_eq!(status.code(), Some(1));
|
||||
|
||||
@ -42,6 +43,17 @@ fn test_command_pidfd() {
|
||||
.unwrap()
|
||||
.pidfd()
|
||||
.expect_err("pidfd should not have been created");
|
||||
|
||||
// exercise the fork/exec path since the earlier attempts may have used pidfd_spawnp()
|
||||
let mut child =
|
||||
unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
|
||||
|
||||
assert!(child.id() > 0 && child.id() < -1i32 as u32);
|
||||
|
||||
if pidfd_open_available {
|
||||
assert!(child.pidfd().is_ok())
|
||||
}
|
||||
child.wait().expect("error waiting on child");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -305,10 +305,13 @@ fn is_minus_one(&self) -> bool {
|
||||
|
||||
impl_is_minus_one! { i8 i16 i32 i64 isize }
|
||||
|
||||
/// Convert native return values to Result using the *-1 means error is in `errno`* convention.
|
||||
/// Non-error values are `Ok`-wrapped.
|
||||
pub fn cvt<T: IsMinusOne>(t: T) -> crate::io::Result<T> {
|
||||
if t.is_minus_one() { Err(crate::io::Error::last_os_error()) } else { Ok(t) }
|
||||
}
|
||||
|
||||
/// `-1` → look at `errno` → retry on `EINTR`. Otherwise `Ok()`-wrap the closure return value.
|
||||
pub fn cvt_r<T, F>(mut f: F) -> crate::io::Result<T>
|
||||
where
|
||||
T: IsMinusOne,
|
||||
@ -323,6 +326,7 @@ pub fn cvt_r<T, F>(mut f: F) -> crate::io::Result<T>
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // Not used on all platforms.
|
||||
/// Zero means `Ok()`, all other values are treated as raw OS errors. Does not look at `errno`.
|
||||
pub fn cvt_nz(error: libc::c_int) -> crate::io::Result<()> {
|
||||
if error == 0 { Ok(()) } else { Err(crate::io::Error::from_raw_os_error(error)) }
|
||||
}
|
||||
|
@ -449,17 +449,82 @@ fn posix_spawn(
|
||||
use crate::mem::MaybeUninit;
|
||||
use crate::sys::weak::weak;
|
||||
use crate::sys::{self, cvt_nz, on_broken_pipe_flag_used};
|
||||
#[cfg(target_os = "linux")]
|
||||
use core::sync::atomic::{AtomicU8, Ordering};
|
||||
|
||||
if self.get_gid().is_some()
|
||||
|| self.get_uid().is_some()
|
||||
|| (self.env_saw_path() && !self.program_is_path())
|
||||
|| !self.get_closures().is_empty()
|
||||
|| self.get_groups().is_some()
|
||||
|| self.get_create_pidfd()
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "linux")] {
|
||||
weak! {
|
||||
fn pidfd_spawnp(
|
||||
*mut libc::c_int,
|
||||
*const libc::c_char,
|
||||
*const libc::posix_spawn_file_actions_t,
|
||||
*const libc::posix_spawnattr_t,
|
||||
*const *mut libc::c_char,
|
||||
*const *mut libc::c_char
|
||||
) -> libc::c_int
|
||||
}
|
||||
|
||||
weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int }
|
||||
|
||||
static PIDFD_SUPPORTED: AtomicU8 = AtomicU8::new(0);
|
||||
const UNKNOWN: u8 = 0;
|
||||
const SPAWN: u8 = 1;
|
||||
// Obtaining a pidfd via the fork+exec path might work
|
||||
const FORK_EXEC: u8 = 2;
|
||||
// Neither pidfd_spawn nor fork/exec will get us a pidfd.
|
||||
// Instead we'll just posix_spawn if the other preconditions are met.
|
||||
const NO: u8 = 3;
|
||||
|
||||
if self.get_create_pidfd() {
|
||||
let mut support = PIDFD_SUPPORTED.load(Ordering::Relaxed);
|
||||
if support == FORK_EXEC {
|
||||
return Ok(None);
|
||||
}
|
||||
if support == UNKNOWN {
|
||||
support = NO;
|
||||
let our_pid = crate::process::id();
|
||||
let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as c_int);
|
||||
match pidfd {
|
||||
Ok(pidfd) => {
|
||||
support = FORK_EXEC;
|
||||
if let Some(Ok(pid)) = pidfd_getpid.get().map(|f| cvt(unsafe { f(pidfd) } as i32)) {
|
||||
if pidfd_spawnp.get().is_some() && pid as u32 == our_pid {
|
||||
support = SPAWN
|
||||
}
|
||||
}
|
||||
unsafe { libc::close(pidfd) };
|
||||
}
|
||||
Err(e) if e.raw_os_error() == Some(libc::EMFILE) => {
|
||||
// We're temporarily(?) out of file descriptors. In this case obtaining a pidfd would also fail
|
||||
// Don't update the support flag so we can probe again later.
|
||||
return Err(e)
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
PIDFD_SUPPORTED.store(support, Ordering::Relaxed);
|
||||
if support == FORK_EXEC {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
core::assert_matches::debug_assert_matches!(support, SPAWN | NO);
|
||||
}
|
||||
} else {
|
||||
if self.get_create_pidfd() {
|
||||
unreachable!("only implemented on linux")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only glibc 2.24+ posix_spawn() supports returning ENOENT directly.
|
||||
#[cfg(all(target_os = "linux", target_env = "gnu"))]
|
||||
{
|
||||
@ -543,9 +608,6 @@ fn posix_spawn_file_actions_addchdir_np(
|
||||
|
||||
let pgroup = self.get_pgroup();
|
||||
|
||||
// Safety: -1 indicates we don't have a pidfd.
|
||||
let mut p = unsafe { Process::new(0, -1) };
|
||||
|
||||
struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
|
||||
|
||||
impl Drop for PosixSpawnFileActions<'_> {
|
||||
@ -640,6 +702,47 @@ fn drop(&mut self) {
|
||||
#[cfg(target_os = "nto")]
|
||||
let spawn_fn = retrying_libc_posix_spawnp;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
if self.get_create_pidfd() && PIDFD_SUPPORTED.load(Ordering::Relaxed) == SPAWN {
|
||||
let mut pidfd: libc::c_int = -1;
|
||||
let spawn_res = pidfd_spawnp.get().unwrap()(
|
||||
&mut pidfd,
|
||||
self.get_program_cstr().as_ptr(),
|
||||
file_actions.0.as_ptr(),
|
||||
attrs.0.as_ptr(),
|
||||
self.get_argv().as_ptr() as *const _,
|
||||
envp as *const _,
|
||||
);
|
||||
|
||||
let spawn_res = cvt_nz(spawn_res);
|
||||
if let Err(ref e) = spawn_res
|
||||
&& e.raw_os_error() == Some(libc::ENOSYS)
|
||||
{
|
||||
PIDFD_SUPPORTED.store(FORK_EXEC, Ordering::Relaxed);
|
||||
return Ok(None);
|
||||
}
|
||||
spawn_res?;
|
||||
|
||||
let pid = match cvt(pidfd_getpid.get().unwrap()(pidfd)) {
|
||||
Ok(pid) => pid,
|
||||
Err(e) => {
|
||||
// The child has been spawned and we are holding its pidfd.
|
||||
// But we cannot obtain its pid even though pidfd_getpid support was verified earlier.
|
||||
// This might happen if libc can't open procfs because the file descriptor limit has been reached.
|
||||
libc::close(pidfd);
|
||||
return Err(Error::new(
|
||||
e.kind(),
|
||||
"pidfd_spawnp succeeded but the child's PID could not be obtained",
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
return Ok(Some(Process::new(pid, pidfd)));
|
||||
}
|
||||
|
||||
// Safety: -1 indicates we don't have a pidfd.
|
||||
let mut p = Process::new(0, -1);
|
||||
|
||||
let spawn_res = spawn_fn(
|
||||
&mut p.pid,
|
||||
self.get_program_cstr().as_ptr(),
|
||||
@ -786,6 +889,12 @@ pub struct Process {
|
||||
|
||||
impl Process {
|
||||
#[cfg(target_os = "linux")]
|
||||
/// # Safety
|
||||
///
|
||||
/// `pidfd` must either be -1 (representing no file descriptor) or a valid, exclusively owned file
|
||||
/// descriptor (See [I/O Safety]).
|
||||
///
|
||||
/// [I/O Safety]: crate::io#io-safety
|
||||
unsafe fn new(pid: pid_t, pidfd: pid_t) -> Self {
|
||||
use crate::os::unix::io::FromRawFd;
|
||||
use crate::sys_common::FromInner;
|
||||
|
Loading…
Reference in New Issue
Block a user