diff --git a/library/std/src/os/linux/mod.rs b/library/std/src/os/linux/mod.rs index 94438defc22..8e7776f6646 100644 --- a/library/std/src/os/linux/mod.rs +++ b/library/std/src/os/linux/mod.rs @@ -4,4 +4,5 @@ #![doc(cfg(target_os = "linux"))] pub mod fs; +pub mod process; pub mod raw; diff --git a/library/std/src/os/linux/process.rs b/library/std/src/os/linux/process.rs new file mode 100644 index 00000000000..661d3cef7a0 --- /dev/null +++ b/library/std/src/os/linux/process.rs @@ -0,0 +1,47 @@ +//! Linux-specific extensions to primitives in the `std::process` module. + +#![unstable(feature = "linux_pidfd", issue = "none")] + +use crate::process; +use crate::sys_common::AsInnerMut; +use crate::io::Result; + +/// Os-specific extensions to [`process::Child`] +/// +/// [`process::Child`]: crate::process::Child +pub trait ChildExt { + /// Obtains the pidfd created for this child process, if available. + /// + /// A pidfd will only ever be available if `create_pidfd(true)` was called + /// when the corresponding `Command` was created. + /// + /// Even if `create_pidfd(true)` is called, a pidfd may not be available + /// due to an older version of Linux being in use, or if + /// some other error occured. + /// + /// See `man pidfd_open` for more details about pidfds. + fn pidfd(&self) -> Result; +} + +/// Os-specific extensions to [`process::Command`] +/// +/// [`process::Command`]: crate::process::Command +pub trait CommandExt { + /// Sets whether or this `Command` will attempt to create a pidfd + /// for the child. If this method is never called, a pidfd will + /// not be crated. + /// + /// The pidfd can be retrieved from the child via [`ChildExt::pidfd`] + /// + /// A pidfd will only be created if it is possible to do so + /// in a guaranteed race-free manner (e.g. if the `clone3` system call is + /// supported). Otherwise, [`ChildExit::pidfd`] will return an error. + fn create_pidfd(&mut self, val: bool) -> &mut process::Command; +} + +impl CommandExt for process::Command { + fn create_pidfd(&mut self, val: bool) -> &mut process::Command { + self.as_inner_mut().create_pidfd(val); + self + } +} diff --git a/library/std/src/process.rs b/library/std/src/process.rs index 11a0432ce27..99c3369425b 100644 --- a/library/std/src/process.rs +++ b/library/std/src/process.rs @@ -166,7 +166,7 @@ use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner}; /// [`wait`]: Child::wait #[stable(feature = "process", since = "1.0.0")] pub struct Child { - handle: imp::Process, + pub(crate) handle: imp::Process, /// The handle for writing to the child's standard input (stdin), if it has /// been captured. To avoid partially moving diff --git a/library/std/src/sys/unix/process/process_common.rs b/library/std/src/sys/unix/process/process_common.rs index c5bdd1bda4a..f7a7a9968b8 100644 --- a/library/std/src/sys/unix/process/process_common.rs +++ b/library/std/src/sys/unix/process/process_common.rs @@ -79,6 +79,7 @@ pub struct Command { stdin: Option, stdout: Option, stderr: Option, + pub(crate) make_pidfd: bool, } // Create a new type for argv, so that we can make it `Send` and `Sync` @@ -141,6 +142,7 @@ impl Command { stdin: None, stdout: None, stderr: None, + make_pidfd: false, } } @@ -176,6 +178,10 @@ impl Command { pub fn groups(&mut self, groups: &[gid_t]) { self.groups = Some(Box::from(groups)); } + + pub fn create_pidfd(&mut self, val: bool) { + self.make_pidfd = val; + } pub fn saw_nul(&self) -> bool { self.saw_nul diff --git a/library/std/src/sys/unix/process/process_unix.rs b/library/std/src/sys/unix/process/process_unix.rs index c888dd0d87d..c1605faceb6 100644 --- a/library/std/src/sys/unix/process/process_unix.rs +++ b/library/std/src/sys/unix/process/process_unix.rs @@ -8,6 +8,10 @@ use crate::ptr; use crate::sys; use crate::sys::cvt; use crate::sys::process::process_common::*; +use crate::sync::atomic::{AtomicBool, Ordering}; + +#[cfg(target_os = "linux")] +use crate::sys::weak::syscall; #[cfg(any( target_os = "macos", @@ -61,7 +65,8 @@ impl Command { // a lock any more because the parent won't do anything and the child is // in its own process. Thus the parent drops the lock guard while the child // forgets it to avoid unlocking it on a new thread, which would be invalid. - let (env_lock, pid) = unsafe { (sys::os::env_read_lock(), cvt(libc::fork())?) }; + let env_lock = sys::os::env_read_lock(); + let (pid, pidfd) = self.do_fork()?; if pid == 0 { crate::panic::always_abort(); @@ -90,7 +95,7 @@ impl Command { drop(env_lock); drop(output); - let mut p = Process { pid, status: None }; + let mut p = Process { pid, status: None, pidfd }; let mut bytes = [0; 8]; // loop to handle EINTR @@ -122,6 +127,85 @@ impl Command { } } + // Attempts to fork the process. If successful, returns + // Ok((0, -1)) in the child, and Ok((child_pid, child_pidfd)) in the parent. + fn do_fork(&mut self) -> Result<(libc::c_long, libc::pid_t), io::Error> { + // If we fail to create a pidfd for any reason, this will + // stay as -1, which indicates an error + let mut pidfd: libc::pid_t = -1; + + // On Linux, attempt to use the `clone3` syscall, which + // supports more argument (in prarticular, the ability to create a pidfd). + // If this fails, we will fall through this block to a call to `fork()` + cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + static HAS_CLONE3: AtomicBool = AtomicBool::new(true); + + const CLONE_PIDFD: u64 = 0x00001000; + + #[repr(C)] + struct clone_args { + flags: u64, + pidfd: u64, + child_tid: u64, + parent_tid: u64, + exit_signal: u64, + stack: u64, + stack_size: u64, + tls: u64, + set_tid: u64, + set_tid_size: u64, + cgroup: u64, + } + + syscall! { + fn clone3(cl_args: *mut clone_args, len: libc::size_t) -> libc::c_long + } + + if HAS_CLONE3.load(Ordering::Relaxed) { + let mut flags = 0; + if self.make_pidfd { + flags |= CLONE_PIDFD; + } + + let mut args = clone_args { + flags, + pidfd: &mut pidfd as *mut libc::pid_t as u64, + child_tid: 0, + parent_tid: 0, + exit_signal: libc::SIGCHLD as u64, + stack: 0, + stack_size: 0, + tls: 0, + set_tid: 0, + set_tid_size: 0, + cgroup: 0 + }; + + let args_ptr = &mut args as *mut clone_args; + let args_size = crate::mem::size_of::(); + + let res = cvt(unsafe { clone3(args_ptr, args_size) }); + match res { + Ok(n) => return Ok((n, pidfd)), + Err(e) => match e.raw_os_error() { + // Multiple threads can race to execute this store, + // but that's fine - that just means that multiple threads + // will have tried and failed to execute the same syscall, + // with no other side effects. + Some(libc::ENOSYS) => HAS_CLONE3.store(false, Ordering::Relaxed), + _ => return Err(e) + } + } + } + } + } + // If we get here, we are either not on Linux, + // or we are on Linux and the 'clone3' syscall does not exist + cvt(unsafe { libc::fork() }.into()).map(|res| (res, pidfd)) + } + + pub fn exec(&mut self, default: Stdio) -> io::Error { let envp = self.capture_env(); @@ -276,8 +360,6 @@ impl Command { #[cfg(not(any( target_os = "macos", target_os = "freebsd", - all(target_os = "linux", target_env = "gnu"), - all(target_os = "linux", target_env = "musl"), )))] fn posix_spawn( &mut self, @@ -292,8 +374,6 @@ impl Command { #[cfg(any( target_os = "macos", target_os = "freebsd", - all(target_os = "linux", target_env = "gnu"), - all(target_os = "linux", target_env = "musl"), ))] fn posix_spawn( &mut self, @@ -441,6 +521,12 @@ impl Command { pub struct Process { pid: pid_t, status: Option, + // On Linux, stores the pidfd created for this child. + // This is -1 if the user did not request pidfd creation, + // or if the pidfd could not be created for some reason + // (e.g. the `clone3` syscall was not available). + #[cfg(target_os = "linux")] + pidfd: libc::c_int, } impl Process { @@ -580,6 +666,18 @@ impl ExitStatusError { } } +#[cfg(target_os = "linux")] +#[unstable(feature = "linux_pidfd", issue = "none")] +impl crate::os::linux::process::ChildExt for crate::process::Child { + fn pidfd(&self) -> crate::io::Result { + if self.handle.pidfd > 0 { + Ok(self.handle.pidfd) + } else { + Err(crate::io::Error::from(crate::io::ErrorKind::Other)) + } + } +} + #[cfg(test)] #[path = "process_unix/tests.rs"] mod tests; diff --git a/src/test/ui/command/command-create-pidfd.rs b/src/test/ui/command/command-create-pidfd.rs new file mode 100644 index 00000000000..248ae3457d7 --- /dev/null +++ b/src/test/ui/command/command-create-pidfd.rs @@ -0,0 +1,27 @@ +// run-pass +// linux-only - pidfds are a linux-specific concept + +#![feature(linux_pidfd)] +use std::os::linux::process::{CommandExt, ChildExt}; +use std::process::Command; + +fn main() { + // We don't assert the precise value, since the standard libarary + // may be opened other file descriptors before our code ran. + let _ = Command::new("echo") + .create_pidfd(true) + .spawn() + .unwrap() + .pidfd().expect("failed to obtain pidfd"); + + let _ = Command::new("echo") + .create_pidfd(false) + .spawn() + .unwrap() + .pidfd().expect_err("pidfd should not have been created when create_pid(false) is set"); + + let _ = Command::new("echo") + .spawn() + .unwrap() + .pidfd().expect_err("pidfd should not have been created"); +}