From e8f9d1a80f1e21dd3b4198c35b9596fb62e8786d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 3 Aug 2023 19:13:08 -0400 Subject: [PATCH 1/2] Refactor the const `strlen` implementation to `const_strlen` Currently, `CStr::from_ptr` contains its own implementation of `strlen` that uses `const_eval_select` to either call libc's `strlen` or use a naive Rust implementation. Refactor that into its own function so we can use it elsewhere in the module. --- library/core/src/ffi/c_str.rs | 78 ++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs index 163a65c909e..801447e96cd 100644 --- a/library/core/src/ffi/c_str.rs +++ b/library/core/src/ffi/c_str.rs @@ -214,6 +214,8 @@ impl CStr { /// * The memory referenced by the returned `CStr` must not be mutated for /// the duration of lifetime `'a`. /// + /// * The nul terminator must be within `isize::MAX` from `ptr` + /// /// > **Note**: This operation is intended to be a 0-cost cast but it is /// > currently implemented with an up-front calculation of the length of /// > the string. This is not guaranteed to always be the case. @@ -259,42 +261,16 @@ impl CStr { #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")] pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr { // SAFETY: The caller has provided a pointer that points to a valid C - // string with a NUL terminator of size less than `isize::MAX`, whose - // content remain valid and doesn't change for the lifetime of the - // returned `CStr`. - // - // Thus computing the length is fine (a NUL byte exists), the call to - // from_raw_parts is safe because we know the length is at most `isize::MAX`, meaning - // the call to `from_bytes_with_nul_unchecked` is correct. + // string with a NUL terminator less than `isize::MAX` from `ptr`. + let len = unsafe { const_strlen(ptr) }; + + // SAFETY: The caller has provided a valid pointer with length less than + // `isize::MAX`, so `from_raw_parts` is safe. The content remains valid + // and doesn't change for the lifetime of the returned `CStr`. This + // means the call to `from_bytes_with_nul_unchecked` is correct. // // The cast from c_char to u8 is ok because a c_char is always one byte. - unsafe { - const fn strlen_ct(s: *const c_char) -> usize { - let mut len = 0; - - // SAFETY: Outer caller has provided a pointer to a valid C string. - while unsafe { *s.add(len) } != 0 { - len += 1; - } - - len - } - - // `inline` is necessary for codegen to see strlen. - #[inline] - fn strlen_rt(s: *const c_char) -> usize { - extern "C" { - /// Provided by libc or compiler_builtins. - fn strlen(s: *const c_char) -> usize; - } - - // SAFETY: Outer caller has provided a pointer to a valid C string. - unsafe { strlen(s) } - } - - let len = intrinsics::const_eval_select((ptr,), strlen_ct, strlen_rt); - Self::from_bytes_with_nul_unchecked(slice::from_raw_parts(ptr.cast(), len + 1)) - } + unsafe { Self::from_bytes_with_nul_unchecked(slice::from_raw_parts(ptr.cast(), len + 1)) } } /// Creates a C string wrapper from a byte slice with any number of nuls. @@ -681,3 +657,37 @@ impl AsRef for CStr { self } } + +/// Calculate the length of a nul-terminated string. Defers to C's `strlen` when possible. +/// +/// # Safety +/// +/// The pointer must point to a valid buffer that contains a NUL terminator. The NUL must be +/// located within `isize::MAX` from `ptr`. +#[inline] +const unsafe fn const_strlen(ptr: *const c_char) -> usize { + const fn strlen_ct(s: *const c_char) -> usize { + let mut len = 0; + + // SAFETY: Outer caller has provided a pointer to a valid C string. + while unsafe { *s.add(len) } != 0 { + len += 1; + } + + len + } + + #[inline] + fn strlen_rt(s: *const c_char) -> usize { + extern "C" { + /// Provided by libc or compiler_builtins. + fn strlen(s: *const c_char) -> usize; + } + + // SAFETY: Outer caller has provided a pointer to a valid C string. + unsafe { strlen(s) } + } + + // SAFETY: the two functions always provide equivalent functionality + unsafe { intrinsics::const_eval_select((ptr,), strlen_ct, strlen_rt) } +} From fe0eb8b49bd31cb956d15ac61ff6bee5b17f59ed Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 3 Aug 2023 19:38:43 -0400 Subject: [PATCH 2/2] Implement `CStr::count_bytes` This is feature gated under `cstr_count_bytes` and provides a more straightforward way to access the length of a `CStr` Link: https://github.com/rust-lang/rust/issues/113219 --- library/core/src/ffi/c_str.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs index 801447e96cd..5592fe8e324 100644 --- a/library/core/src/ffi/c_str.rs +++ b/library/core/src/ffi/c_str.rs @@ -491,6 +491,34 @@ impl CStr { self.inner.as_ptr() } + /// Returns the length of `self`. Like C's `strlen`, this does not include the nul terminator. + /// + /// > **Note**: This method is currently implemented as a constant-time + /// > cast, but it is planned to alter its definition in the future to + /// > perform the length calculation whenever this method is called. + /// + /// # Examples + /// + /// ``` + /// #![feature(cstr_count_bytes)] + /// + /// use std::ffi::CStr; + /// + /// let cstr = CStr::from_bytes_with_nul(b"foo\0").unwrap(); + /// assert_eq!(cstr.count_bytes(), 3); + /// + /// let cstr = CStr::from_bytes_with_nul(b"\0").unwrap(); + /// assert_eq!(cstr.count_bytes(), 0); + /// ``` + #[inline] + #[must_use] + #[doc(alias("len", "strlen"))] + #[unstable(feature = "cstr_count_bytes", issue = "114441")] + #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")] + pub const fn count_bytes(&self) -> usize { + self.inner.len() - 1 + } + /// Returns `true` if `self.to_bytes()` has a length of 0. /// /// # Examples