From 6016bad0636f3597851bf4d8e2b7262932dfb777 Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Fri, 17 May 2024 22:11:50 +0200 Subject: [PATCH 1/4] Use `_NSGetEnviron` instead of `environ` on iOS/tvOS/watchOS/visionOS This should be slightly more correct, and matches the implementation in other programming languages: - [Python's `os.environ`](https://github.com/python/cpython/blob/v3.12.3/Modules/posixmodule.c#L1562-L1566). - [Swift's `Darwin.environ`](https://github.com/apple/swift-corelibs-foundation/blob/swift-5.10-RELEASE/CoreFoundation/Base.subproj/CFPlatform.c#L1811-L1812), though that library is bundled on the system, so they can change it if they want. - [Dart/Flutter](https://github.com/dart-lang/sdk/blob/3.4.0/runtime/bin/platform_macos.cc#L205-L234), doesn't support environment variables on iOS. - Node seems to not be entirely consistent with it: - [`process.c`](https://github.com/nodejs/node/blob/v22.1.0/deps/uv/src/unix/process.c#L38). - [`unix/core.c`](https://github.com/nodejs/node/blob/v22.1.0/deps/uv/src/unix/core.c#L59). - [.NET/Xamarin](https://github.com/dotnet/runtime/blob/v8.0.5/src/native/libs/configure.cmake#L1099-L1106). - [OpenJDK](https://github.com/openjdk/jdk/blob/jdk-23%2B22/src/java.base/unix/native/libjava/ProcessEnvironment_md.c#L31-L33). --- library/std/src/sys/pal/unix/os.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/library/std/src/sys/pal/unix/os.rs b/library/std/src/sys/pal/unix/os.rs index 3a281525f8d..8afc49f5227 100644 --- a/library/std/src/sys/pal/unix/os.rs +++ b/library/std/src/sys/pal/unix/os.rs @@ -576,12 +576,36 @@ fn size_hint(&self) -> (usize, Option) { } } -#[cfg(target_os = "macos")] +// Use `_NSGetEnviron` on Apple platforms. +// +// `_NSGetEnviron` is the documented alternative (see `man environ`), and has +// been available since the first versions of both macOS and iOS. +// +// Nowadays, specifically since macOS 10.8, `environ` has been exposed through +// `libdyld.dylib`, which is linked via. `libSystem.dylib`: +// +// +// So in the end, it likely doesn't really matter which option we use, but the +// performance cost of using `_NSGetEnviron` is extremely miniscule, and it +// might be ever so slightly more supported, so let's just use that. +// +// NOTE: The header where this is defined (`crt_externs.h`) was added to the +// iOS 13.0 SDK, which has been the source of a great deal of confusion in the +// past about the availability of this API. +// +// NOTE(madsmtm): Neither this nor using `environ` has been verified to not +// cause App Store rejections; if this is found to be the case, an alternative +// implementation of this is possible using `[NSProcessInfo environment]` +// - which internally uses `_NSGetEnviron` and a system-wide lock on the +// environment variables to protect against `setenv`, so using that might be +// desirable anyhow? Though it also means that we have to link to Foundation. +#[cfg(target_vendor = "apple")] pub unsafe fn environ() -> *mut *const *const c_char { libc::_NSGetEnviron() as *mut *const *const c_char } -#[cfg(not(target_os = "macos"))] +// Use the `environ` static which is part of POSIX. +#[cfg(not(target_vendor = "apple"))] pub unsafe fn environ() -> *mut *const *const c_char { extern "C" { static mut environ: *const *const c_char; From 8f18e4fe4bd8cce290c42b662e8db6897b47582d Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Fri, 17 May 2024 22:01:54 +0200 Subject: [PATCH 2/4] Use `_NSGetArgc`/`_NSGetArgv` on iOS/tvOS/watchOS/visionOS If we're comfortable using `_NSGetEnviron` from `crt_externs.h`, there shouldn't be an issue with using these either, and then we can merge with the macOS implementation. This also fixes two test cases on Mac Catalyst: - `tests/ui/command/command-argv0.rs`, maybe because `[[NSProcessInfo processInfo] arguments]` somehow converts the name of the first argument? - `tests/ui/env-funky-keys.rs` since we no longer link to Foundation. --- library/std/src/sys/pal/unix/args.rs | 92 +++++----------------------- library/std/src/sys/pal/unix/mod.rs | 11 ++-- 2 files changed, 19 insertions(+), 84 deletions(-) diff --git a/library/std/src/sys/pal/unix/args.rs b/library/std/src/sys/pal/unix/args.rs index 2a3298e8b4c..66ee45be1a9 100644 --- a/library/std/src/sys/pal/unix/args.rs +++ b/library/std/src/sys/pal/unix/args.rs @@ -168,16 +168,28 @@ fn clone() -> Vec { } } +// Use `_NSGetArgc` and `_NSGetArgv` on Apple platforms. +// +// Even though these have underscores in their names, they've been available +// since since the first versions of both macOS and iOS, and are declared in +// the header `crt_externs.h`. +// +// NOTE: This header was added to the iOS 13.0 SDK, which has been the source +// of a great deal of confusion in the past about the availability of these +// APIs. +// +// NOTE(madsmtm): This has not strictly been verified to not cause App Store +// rejections; if this is found to be the case, the previous implementation +// of this used `[[NSProcessInfo processInfo] arguments]`. #[cfg(target_vendor = "apple")] mod imp { use super::Args; use crate::ffi::CStr; + use crate::os::unix::prelude::*; pub unsafe fn init(_argc: isize, _argv: *const *const u8) {} - #[cfg(target_os = "macos")] pub fn args() -> Args { - use crate::os::unix::prelude::*; extern "C" { // These functions are in crt_externs.h. fn _NSGetArgc() -> *mut libc::c_int; @@ -196,82 +208,6 @@ pub fn args() -> Args { }; Args { iter: vec.into_iter() } } - - // As _NSGetArgc and _NSGetArgv aren't mentioned in iOS docs - // and use underscores in their names - they're most probably - // are considered private and therefore should be avoided. - // Here is another way to get arguments using the Objective-C - // runtime. - // - // In general it looks like: - // res = Vec::new() - // let args = [[NSProcessInfo processInfo] arguments] - // for i in (0..[args count]) - // res.push([args objectAtIndex:i]) - // res - #[cfg(not(target_os = "macos"))] - pub fn args() -> Args { - use crate::ffi::{c_char, c_void, OsString}; - use crate::mem; - use crate::str; - - type Sel = *const c_void; - type NsId = *const c_void; - type NSUInteger = usize; - - extern "C" { - fn sel_registerName(name: *const c_char) -> Sel; - fn objc_getClass(class_name: *const c_char) -> NsId; - - // This must be transmuted to an appropriate function pointer type before being called. - fn objc_msgSend(); - } - - const MSG_SEND_PTR: unsafe extern "C" fn() = objc_msgSend; - const MSG_SEND_NO_ARGUMENTS_RETURN_PTR: unsafe extern "C" fn(NsId, Sel) -> *const c_void = - unsafe { mem::transmute(MSG_SEND_PTR) }; - const MSG_SEND_NO_ARGUMENTS_RETURN_NSUINTEGER: unsafe extern "C" fn( - NsId, - Sel, - ) -> NSUInteger = unsafe { mem::transmute(MSG_SEND_PTR) }; - const MSG_SEND_NSINTEGER_ARGUMENT_RETURN_PTR: unsafe extern "C" fn( - NsId, - Sel, - NSUInteger, - ) - -> *const c_void = unsafe { mem::transmute(MSG_SEND_PTR) }; - - let mut res = Vec::new(); - - unsafe { - let process_info_sel = sel_registerName(c"processInfo".as_ptr()); - let arguments_sel = sel_registerName(c"arguments".as_ptr()); - let count_sel = sel_registerName(c"count".as_ptr()); - let object_at_index_sel = sel_registerName(c"objectAtIndex:".as_ptr()); - let utf8string_sel = sel_registerName(c"UTF8String".as_ptr()); - - let klass = objc_getClass(c"NSProcessInfo".as_ptr()); - // `+[NSProcessInfo processInfo]` returns an object with +0 retain count, so no need to manually `retain/release`. - let info = MSG_SEND_NO_ARGUMENTS_RETURN_PTR(klass, process_info_sel); - - // `-[NSProcessInfo arguments]` returns an object with +0 retain count, so no need to manually `retain/release`. - let args = MSG_SEND_NO_ARGUMENTS_RETURN_PTR(info, arguments_sel); - - let cnt = MSG_SEND_NO_ARGUMENTS_RETURN_NSUINTEGER(args, count_sel); - for i in 0..cnt { - // `-[NSArray objectAtIndex:]` returns an object whose lifetime is tied to the array, so no need to manually `retain/release`. - let ns_string = - MSG_SEND_NSINTEGER_ARGUMENT_RETURN_PTR(args, object_at_index_sel, i); - // The lifetime of this pointer is tied to the NSString, as well as the current autorelease pool, which is why we heap-allocate the string below. - let utf_c_str: *const c_char = - MSG_SEND_NO_ARGUMENTS_RETURN_PTR(ns_string, utf8string_sel).cast(); - let bytes = CStr::from_ptr(utf_c_str).to_bytes(); - res.push(OsString::from(str::from_utf8(bytes).unwrap())) - } - } - - Args { iter: res.into_iter() } - } } #[cfg(any(target_os = "espidf", target_os = "vita"))] diff --git a/library/std/src/sys/pal/unix/mod.rs b/library/std/src/sys/pal/unix/mod.rs index 21f233e2262..735ed96bc7b 100644 --- a/library/std/src/sys/pal/unix/mod.rs +++ b/library/std/src/sys/pal/unix/mod.rs @@ -399,14 +399,13 @@ pub fn abort_internal() -> ! { // Use libumem for the (malloc-compatible) allocator #[link(name = "umem")] extern "C" {} - } else if #[cfg(target_os = "macos")] { + } else if #[cfg(target_vendor = "apple")] { + // Link to `libSystem.dylib`. + // + // Don't get confused by the presence of `System.framework`, + // it is a deprecated wrapper over the dynamic library. #[link(name = "System")] extern "C" {} - } else if #[cfg(all(target_vendor = "apple", not(target_os = "macos")))] { - #[link(name = "System")] - #[link(name = "objc")] - #[link(name = "Foundation", kind = "framework")] - extern "C" {} } else if #[cfg(target_os = "fuchsia")] { #[link(name = "zircon")] #[link(name = "fdio")] From abd5d0e37bfe54543aa80c86de30db829dd10ed2 Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Sun, 19 May 2024 04:18:51 +0200 Subject: [PATCH 3/4] Add NULL check in argument parsing on Apple platforms --- library/std/src/sys/pal/unix/args.rs | 58 +++++++++++++++++++++------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/library/std/src/sys/pal/unix/args.rs b/library/std/src/sys/pal/unix/args.rs index 66ee45be1a9..b7e8534eea8 100644 --- a/library/std/src/sys/pal/unix/args.rs +++ b/library/std/src/sys/pal/unix/args.rs @@ -184,28 +184,58 @@ fn clone() -> Vec { #[cfg(target_vendor = "apple")] mod imp { use super::Args; - use crate::ffi::CStr; + use crate::ffi::{c_char, c_int, CStr}; use crate::os::unix::prelude::*; - pub unsafe fn init(_argc: isize, _argv: *const *const u8) {} + pub unsafe fn init(_argc: isize, _argv: *const *const u8) { + // No need to initialize anything in here, `libdyld.dylib` has already + // done the work for us. + } pub fn args() -> Args { extern "C" { // These functions are in crt_externs.h. - fn _NSGetArgc() -> *mut libc::c_int; - fn _NSGetArgv() -> *mut *mut *mut libc::c_char; + fn _NSGetArgc() -> *mut c_int; + fn _NSGetArgv() -> *mut *mut *mut c_char; + } + + // SAFETY: The returned pointer points to a static initialized early + // in the program lifetime by `libdyld.dylib`, and as such is always + // valid. + // + // NOTE: Similar to `_NSGetEnviron`, there technically isn't anything + // protecting us against concurrent modifications to this, and there + // doesn't exist a lock that we can take. Instead, it is generally + // expected that it's only modified in `main` / before other code + // runs, so reading this here should be fine. + let argc = unsafe { _NSGetArgc().read() }; + // SAFETY: Same as above. + let argv = unsafe { _NSGetArgv().read() }; + + let mut vec = Vec::with_capacity(argc as usize); + + for i in 0..argc { + // SAFETY: `argv` is at least as long as `argc`, so reading from + // it should be safe. + let ptr = unsafe { argv.offset(i as isize).read() }; + + // Entries may have been removed from `argv` by setting them to + // NULL, without updating `argc`. + if ptr.is_null() { + // We continue instead of break here, as an argument may have + // been set to `NULL` in the middle, instead of at the end of + // the list. + // + // This is the same as what `-[NSProcessInfo arguments]` does. + continue; + } + + // SAFETY: Just checked that the pointer is not NULL, and + // arguments are otherwise guaranteed to be valid C strings. + let cstr = unsafe { CStr::from_ptr(ptr) }; + vec.push(OsStringExt::from_vec(cstr.to_bytes().to_vec())); } - let vec = unsafe { - let (argc, argv) = - (*_NSGetArgc() as isize, *_NSGetArgv() as *const *const libc::c_char); - (0..argc as isize) - .map(|i| { - let bytes = CStr::from_ptr(*argv.offset(i)).to_bytes().to_vec(); - OsStringExt::from_vec(bytes) - }) - .collect::>() - }; Args { iter: vec.into_iter() } } } From 38ad85160314b446a08705974d3b03cc29c338c9 Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Mon, 20 May 2024 04:54:27 +0200 Subject: [PATCH 4/4] Make NULL check in argument parsing the same on all unix platforms --- library/std/src/sys/pal/unix/args.rs | 138 ++++++++++++--------------- 1 file changed, 63 insertions(+), 75 deletions(-) diff --git a/library/std/src/sys/pal/unix/args.rs b/library/std/src/sys/pal/unix/args.rs index b7e8534eea8..db2ec73148e 100644 --- a/library/std/src/sys/pal/unix/args.rs +++ b/library/std/src/sys/pal/unix/args.rs @@ -5,8 +5,9 @@ #![allow(dead_code)] // runtime init functions not used during testing -use crate::ffi::OsString; +use crate::ffi::{CStr, OsString}; use crate::fmt; +use crate::os::unix::ffi::OsStringExt; use crate::vec; /// One-time global initialization. @@ -16,7 +17,46 @@ pub unsafe fn init(argc: isize, argv: *const *const u8) { /// Returns the command line arguments pub fn args() -> Args { - imp::args() + let (argc, argv) = imp::argc_argv(); + + let mut vec = Vec::with_capacity(argc as usize); + + for i in 0..argc { + // SAFETY: `argv` is non-null if `argc` is positive, and it is + // guaranteed to be at least as long as `argc`, so reading from it + // should be safe. + let ptr = unsafe { argv.offset(i).read() }; + + // Some C commandline parsers (e.g. GLib and Qt) are replacing already + // handled arguments in `argv` with `NULL` and move them to the end. + // + // Since they can't directly ensure updates to `argc` as well, this + // means that `argc` might be bigger than the actual number of + // non-`NULL` pointers in `argv` at this point. + // + // To handle this we simply stop iterating at the first `NULL` + // argument. `argv` is also guaranteed to be `NULL`-terminated so any + // non-`NULL` arguments after the first `NULL` can safely be ignored. + if ptr.is_null() { + // NOTE: On Apple platforms, `-[NSProcessInfo arguments]` does not + // stop iterating here, but instead `continue`, always iterating + // up until it reached `argc`. + // + // This difference will only matter in very specific circumstances + // where `argc`/`argv` have been modified, but in unexpected ways, + // so it likely doesn't really matter which option we choose. + // See the following PR for further discussion: + // + break; + } + + // SAFETY: Just checked that the pointer is not NULL, and arguments + // are otherwise guaranteed to be valid C strings. + let cstr = unsafe { CStr::from_ptr(ptr) }; + vec.push(OsStringExt::from_vec(cstr.to_bytes().to_vec())); + } + + Args { iter: vec.into_iter() } } pub struct Args { @@ -75,9 +115,7 @@ fn next_back(&mut self) -> Option { target_os = "hurd", ))] mod imp { - use super::Args; - use crate::ffi::{CStr, OsString}; - use crate::os::unix::prelude::*; + use crate::ffi::c_char; use crate::ptr; use crate::sync::atomic::{AtomicIsize, AtomicPtr, Ordering}; @@ -126,45 +164,19 @@ extern "C" fn init_wrapper( init_wrapper }; - pub fn args() -> Args { - Args { iter: clone().into_iter() } - } + pub fn argc_argv() -> (isize, *const *const c_char) { + // Load ARGC and ARGV, which hold the unmodified system-provided + // argc/argv, so we can read the pointed-to memory without atomics or + // synchronization. + // + // If either ARGC or ARGV is still zero or null, then either there + // really are no arguments, or someone is asking for `args()` before + // initialization has completed, and we return an empty list. + let argv = ARGV.load(Ordering::Relaxed); + let argc = if argv.is_null() { 0 } else { ARGC.load(Ordering::Relaxed) }; - fn clone() -> Vec { - unsafe { - // Load ARGC and ARGV, which hold the unmodified system-provided - // argc/argv, so we can read the pointed-to memory without atomics - // or synchronization. - // - // If either ARGC or ARGV is still zero or null, then either there - // really are no arguments, or someone is asking for `args()` - // before initialization has completed, and we return an empty - // list. - let argv = ARGV.load(Ordering::Relaxed); - let argc = if argv.is_null() { 0 } else { ARGC.load(Ordering::Relaxed) }; - let mut args = Vec::with_capacity(argc as usize); - for i in 0..argc { - let ptr = *argv.offset(i) as *const libc::c_char; - - // Some C commandline parsers (e.g. GLib and Qt) are replacing already - // handled arguments in `argv` with `NULL` and move them to the end. That - // means that `argc` might be bigger than the actual number of non-`NULL` - // pointers in `argv` at this point. - // - // To handle this we simply stop iterating at the first `NULL` argument. - // - // `argv` is also guaranteed to be `NULL`-terminated so any non-`NULL` arguments - // after the first `NULL` can safely be ignored. - if ptr.is_null() { - break; - } - - let cstr = CStr::from_ptr(ptr); - args.push(OsStringExt::from_vec(cstr.to_bytes().to_vec())); - } - - args - } + // Cast from `*mut *const u8` to `*const *const c_char` + (argc, argv.cast()) } } @@ -183,16 +195,14 @@ fn clone() -> Vec { // of this used `[[NSProcessInfo processInfo] arguments]`. #[cfg(target_vendor = "apple")] mod imp { - use super::Args; - use crate::ffi::{c_char, c_int, CStr}; - use crate::os::unix::prelude::*; + use crate::ffi::{c_char, c_int}; pub unsafe fn init(_argc: isize, _argv: *const *const u8) { // No need to initialize anything in here, `libdyld.dylib` has already // done the work for us. } - pub fn args() -> Args { + pub fn argc_argv() -> (isize, *const *const c_char) { extern "C" { // These functions are in crt_externs.h. fn _NSGetArgc() -> *mut c_int; @@ -212,42 +222,20 @@ pub fn args() -> Args { // SAFETY: Same as above. let argv = unsafe { _NSGetArgv().read() }; - let mut vec = Vec::with_capacity(argc as usize); - - for i in 0..argc { - // SAFETY: `argv` is at least as long as `argc`, so reading from - // it should be safe. - let ptr = unsafe { argv.offset(i as isize).read() }; - - // Entries may have been removed from `argv` by setting them to - // NULL, without updating `argc`. - if ptr.is_null() { - // We continue instead of break here, as an argument may have - // been set to `NULL` in the middle, instead of at the end of - // the list. - // - // This is the same as what `-[NSProcessInfo arguments]` does. - continue; - } - - // SAFETY: Just checked that the pointer is not NULL, and - // arguments are otherwise guaranteed to be valid C strings. - let cstr = unsafe { CStr::from_ptr(ptr) }; - vec.push(OsStringExt::from_vec(cstr.to_bytes().to_vec())); - } - - Args { iter: vec.into_iter() } + // Cast from `*mut *mut c_char` to `*const *const c_char` + (argc as isize, argv.cast()) } } #[cfg(any(target_os = "espidf", target_os = "vita"))] mod imp { - use super::Args; + use crate::ffi::c_char; + use crate::ptr; #[inline(always)] pub unsafe fn init(_argc: isize, _argv: *const *const u8) {} - pub fn args() -> Args { - Args { iter: Vec::new().into_iter() } + pub fn argc_argv() -> (isize, *const *const c_char) { + (0, ptr::null()) } }