Auto merge of #121271 - matthiaskrgr:rollup-56ru17w, r=matthiaskrgr

Rollup of 7 pull requests

Successful merges:

 - #118569 (Move `OsStr::slice_encoded_bytes` validation to platform modules)
 - #121067 (make "invalid fragment specifier" translatable)
 - #121224 (Remove unnecessary unit binding)
 - #121247 (Add help to `hir_analysis_unrecognized_intrinsic_function`)
 - #121257 (remove extraneous text from example config)
 - #121260 (Remove const_prop.rs)
 - #121266 (Add uncontroversial syscall doc aliases to std docs)

r? `@ghost`
`@rustbot` modify labels: rollup
This commit is contained in:
bors 2024-02-18 18:52:11 +00:00
commit 8a497723e3
29 changed files with 473 additions and 248 deletions

View File

@ -61,6 +61,11 @@ expand_invalid_cfg_multiple_predicates = multiple `cfg` predicates are specified
expand_invalid_cfg_no_parens = `cfg` is not followed by parentheses
expand_invalid_cfg_no_predicate = `cfg` predicate is not specified
expand_invalid_cfg_predicate_literal = `cfg` predicate key cannot be a literal
expand_invalid_fragment_specifier =
invalid fragment specifier `{$fragment}`
.help = {$help}
expand_macro_body_stability =
macros cannot have body stability attributes
.label = invalid body stability attribute

View File

@ -408,3 +408,13 @@ pub struct DuplicateMatcherBinding {
#[label(expand_label2)]
pub prev: Span,
}
#[derive(Diagnostic)]
#[diag(expand_invalid_fragment_specifier)]
#[help]
pub struct InvalidFragmentSpecifier {
#[primary_span]
pub span: Span,
pub fragment: Ident,
pub help: String,
}

View File

@ -1,3 +1,4 @@
use crate::errors;
use crate::mbe::macro_parser::count_metavar_decls;
use crate::mbe::{Delimited, KleeneOp, KleeneToken, MetaVarExpr, SequenceRepetition, TokenTree};
@ -60,11 +61,11 @@ pub(super) fn parse(
Some(&tokenstream::TokenTree::Token(Token { kind: token::Colon, span }, _)) => {
match trees.next() {
Some(tokenstream::TokenTree::Token(token, _)) => match token.ident() {
Some((frag, _)) => {
Some((fragment, _)) => {
let span = token.span.with_lo(start_sp.lo());
let kind =
token::NonterminalKind::from_symbol(frag.name, || {
token::NonterminalKind::from_symbol(fragment.name, || {
// FIXME(#85708) - once we properly decode a foreign
// crate's `SyntaxContext::root`, then we can replace
// this with just `span.edition()`. A
@ -81,14 +82,13 @@ pub(super) fn parse(
})
.unwrap_or_else(
|| {
let msg = format!(
"invalid fragment specifier `{}`",
frag.name
sess.dcx().emit_err(
errors::InvalidFragmentSpecifier {
span,
fragment,
help: VALID_FRAGMENT_NAMES_MSG.into(),
},
);
sess.dcx()
.struct_span_err(span, msg)
.with_help(VALID_FRAGMENT_NAMES_MSG)
.emit();
token::NonterminalKind::Ident
},
);

View File

@ -469,6 +469,7 @@ hir_analysis_unrecognized_atomic_operation =
hir_analysis_unrecognized_intrinsic_function =
unrecognized intrinsic function: `{$name}`
.label = unrecognized intrinsic
.help = if you're adding an intrinsic, be sure to update `check_intrinsic_type`
hir_analysis_unused_associated_type_bounds =
unnecessary associated type bound for not object safe associated type

View File

@ -143,6 +143,7 @@ pub struct WrongNumberOfGenericArgumentsToIntrinsic<'a> {
#[derive(Diagnostic)]
#[diag(hir_analysis_unrecognized_intrinsic_function, code = E0093)]
#[help]
pub struct UnrecognizedIntrinsicFunction {
#[primary_span]
#[label]

View File

@ -1,161 +0,0 @@
//! Propagates constants for early reporting of statically known
//! assertion failures
use rustc_index::bit_set::BitSet;
use rustc_index::IndexVec;
use rustc_middle::mir::visit::{MutatingUseContext, NonMutatingUseContext, PlaceContext, Visitor};
use rustc_middle::mir::*;
use rustc_middle::ty::{ParamEnv, TyCtxt};
use rustc_target::abi::Size;
/// The maximum number of bytes that we'll allocate space for a local or the return value.
/// Needed for #66397, because otherwise we eval into large places and that can cause OOM or just
/// Severely regress performance.
const MAX_ALLOC_LIMIT: u64 = 1024;
/// Macro for machine-specific `InterpError` without allocation.
/// (These will never be shown to the user, but they help diagnose ICEs.)
pub(crate) macro throw_machine_stop_str($($tt:tt)*) {{
// We make a new local type for it. The type itself does not carry any information,
// but its vtable (for the `MachineStopType` trait) does.
#[derive(Debug)]
struct Zst;
// Printing this type shows the desired string.
impl std::fmt::Display for Zst {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, $($tt)*)
}
}
impl rustc_middle::mir::interpret::MachineStopType for Zst {
fn diagnostic_message(&self) -> rustc_errors::DiagnosticMessage {
self.to_string().into()
}
fn add_args(
self: Box<Self>,
_: &mut dyn FnMut(rustc_errors::DiagnosticArgName, rustc_errors::DiagnosticArgValue),
) {}
}
throw_machine_stop!(Zst)
}}
/// The mode that `ConstProp` is allowed to run in for a given `Local`.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum ConstPropMode {
/// The `Local` can be propagated into and reads of this `Local` can also be propagated.
FullConstProp,
/// The `Local` can only be propagated into and from its own block.
OnlyInsideOwnBlock,
/// The `Local` cannot be part of propagation at all. Any statement
/// referencing it either for reading or writing will not get propagated.
NoPropagation,
}
pub struct CanConstProp {
can_const_prop: IndexVec<Local, ConstPropMode>,
// False at the beginning. Once set, no more assignments are allowed to that local.
found_assignment: BitSet<Local>,
}
impl CanConstProp {
/// Returns true if `local` can be propagated
pub fn check<'tcx>(
tcx: TyCtxt<'tcx>,
param_env: ParamEnv<'tcx>,
body: &Body<'tcx>,
) -> IndexVec<Local, ConstPropMode> {
let mut cpv = CanConstProp {
can_const_prop: IndexVec::from_elem(ConstPropMode::FullConstProp, &body.local_decls),
found_assignment: BitSet::new_empty(body.local_decls.len()),
};
for (local, val) in cpv.can_const_prop.iter_enumerated_mut() {
let ty = body.local_decls[local].ty;
match tcx.layout_of(param_env.and(ty)) {
Ok(layout) if layout.size < Size::from_bytes(MAX_ALLOC_LIMIT) => {}
// Either the layout fails to compute, then we can't use this local anyway
// or the local is too large, then we don't want to.
_ => {
*val = ConstPropMode::NoPropagation;
continue;
}
}
}
// Consider that arguments are assigned on entry.
for arg in body.args_iter() {
cpv.found_assignment.insert(arg);
}
cpv.visit_body(body);
cpv.can_const_prop
}
}
impl<'tcx> Visitor<'tcx> for CanConstProp {
fn visit_place(&mut self, place: &Place<'tcx>, mut context: PlaceContext, loc: Location) {
use rustc_middle::mir::visit::PlaceContext::*;
// Dereferencing just read the addess of `place.local`.
if place.projection.first() == Some(&PlaceElem::Deref) {
context = NonMutatingUse(NonMutatingUseContext::Copy);
}
self.visit_local(place.local, context, loc);
self.visit_projection(place.as_ref(), context, loc);
}
fn visit_local(&mut self, local: Local, context: PlaceContext, _: Location) {
use rustc_middle::mir::visit::PlaceContext::*;
match context {
// These are just stores, where the storing is not propagatable, but there may be later
// mutations of the same local via `Store`
| MutatingUse(MutatingUseContext::Call)
| MutatingUse(MutatingUseContext::AsmOutput)
| MutatingUse(MutatingUseContext::Deinit)
// Actual store that can possibly even propagate a value
| MutatingUse(MutatingUseContext::Store)
| MutatingUse(MutatingUseContext::SetDiscriminant) => {
if !self.found_assignment.insert(local) {
match &mut self.can_const_prop[local] {
// If the local can only get propagated in its own block, then we don't have
// to worry about multiple assignments, as we'll nuke the const state at the
// end of the block anyway, and inside the block we overwrite previous
// states as applicable.
ConstPropMode::OnlyInsideOwnBlock => {}
ConstPropMode::NoPropagation => {}
other @ ConstPropMode::FullConstProp => {
trace!(
"local {:?} can't be propagated because of multiple assignments. Previous state: {:?}",
local, other,
);
*other = ConstPropMode::OnlyInsideOwnBlock;
}
}
}
}
// Reading constants is allowed an arbitrary number of times
NonMutatingUse(NonMutatingUseContext::Copy)
| NonMutatingUse(NonMutatingUseContext::Move)
| NonMutatingUse(NonMutatingUseContext::Inspect)
| NonMutatingUse(NonMutatingUseContext::PlaceMention)
| NonUse(_) => {}
// These could be propagated with a smarter analysis or just some careful thinking about
// whether they'd be fine right now.
MutatingUse(MutatingUseContext::Yield)
| MutatingUse(MutatingUseContext::Drop)
| MutatingUse(MutatingUseContext::Retag)
// These can't ever be propagated under any scheme, as we can't reason about indirect
// mutation.
| NonMutatingUse(NonMutatingUseContext::SharedBorrow)
| NonMutatingUse(NonMutatingUseContext::FakeBorrow)
| NonMutatingUse(NonMutatingUseContext::AddressOf)
| MutatingUse(MutatingUseContext::Borrow)
| MutatingUse(MutatingUseContext::AddressOf) => {
trace!("local {:?} can't be propagated because it's used: {:?}", local, context);
self.can_const_prop[local] = ConstPropMode::NoPropagation;
}
MutatingUse(MutatingUseContext::Projection)
| NonMutatingUse(NonMutatingUseContext::Projection) => bug!("visit_place should not pass {context:?} for {local:?}"),
}
}
}

View File

@ -9,17 +9,14 @@ use rustc_const_eval::interpret::{
use rustc_data_structures::fx::FxHashSet;
use rustc_hir::def::DefKind;
use rustc_hir::HirId;
use rustc_index::bit_set::BitSet;
use rustc_index::{Idx, IndexVec};
use rustc_middle::mir::visit::Visitor;
use rustc_index::{bit_set::BitSet, Idx, IndexVec};
use rustc_middle::mir::visit::{MutatingUseContext, NonMutatingUseContext, PlaceContext, Visitor};
use rustc_middle::mir::*;
use rustc_middle::ty::layout::{LayoutError, LayoutOf, LayoutOfHelpers, TyAndLayout};
use rustc_middle::ty::{self, ConstInt, ParamEnv, ScalarInt, Ty, TyCtxt, TypeVisitableExt};
use rustc_span::Span;
use rustc_target::abi::{Abi, FieldIdx, HasDataLayout, Size, TargetDataLayout, VariantIdx};
use crate::const_prop::CanConstProp;
use crate::const_prop::ConstPropMode;
use crate::dataflow_const_prop::DummyMachine;
use crate::errors::{AssertLint, AssertLintKind};
use crate::MirLint;
@ -849,3 +846,128 @@ impl<'tcx> Visitor<'tcx> for ConstPropagator<'_, 'tcx> {
}
}
}
/// The maximum number of bytes that we'll allocate space for a local or the return value.
/// Needed for #66397, because otherwise we eval into large places and that can cause OOM or just
/// Severely regress performance.
const MAX_ALLOC_LIMIT: u64 = 1024;
/// The mode that `ConstProp` is allowed to run in for a given `Local`.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum ConstPropMode {
/// The `Local` can be propagated into and reads of this `Local` can also be propagated.
FullConstProp,
/// The `Local` can only be propagated into and from its own block.
OnlyInsideOwnBlock,
/// The `Local` cannot be part of propagation at all. Any statement
/// referencing it either for reading or writing will not get propagated.
NoPropagation,
}
pub struct CanConstProp {
can_const_prop: IndexVec<Local, ConstPropMode>,
// False at the beginning. Once set, no more assignments are allowed to that local.
found_assignment: BitSet<Local>,
}
impl CanConstProp {
/// Returns true if `local` can be propagated
pub fn check<'tcx>(
tcx: TyCtxt<'tcx>,
param_env: ParamEnv<'tcx>,
body: &Body<'tcx>,
) -> IndexVec<Local, ConstPropMode> {
let mut cpv = CanConstProp {
can_const_prop: IndexVec::from_elem(ConstPropMode::FullConstProp, &body.local_decls),
found_assignment: BitSet::new_empty(body.local_decls.len()),
};
for (local, val) in cpv.can_const_prop.iter_enumerated_mut() {
let ty = body.local_decls[local].ty;
match tcx.layout_of(param_env.and(ty)) {
Ok(layout) if layout.size < Size::from_bytes(MAX_ALLOC_LIMIT) => {}
// Either the layout fails to compute, then we can't use this local anyway
// or the local is too large, then we don't want to.
_ => {
*val = ConstPropMode::NoPropagation;
continue;
}
}
}
// Consider that arguments are assigned on entry.
for arg in body.args_iter() {
cpv.found_assignment.insert(arg);
}
cpv.visit_body(body);
cpv.can_const_prop
}
}
impl<'tcx> Visitor<'tcx> for CanConstProp {
fn visit_place(&mut self, place: &Place<'tcx>, mut context: PlaceContext, loc: Location) {
use rustc_middle::mir::visit::PlaceContext::*;
// Dereferencing just read the addess of `place.local`.
if place.projection.first() == Some(&PlaceElem::Deref) {
context = NonMutatingUse(NonMutatingUseContext::Copy);
}
self.visit_local(place.local, context, loc);
self.visit_projection(place.as_ref(), context, loc);
}
fn visit_local(&mut self, local: Local, context: PlaceContext, _: Location) {
use rustc_middle::mir::visit::PlaceContext::*;
match context {
// These are just stores, where the storing is not propagatable, but there may be later
// mutations of the same local via `Store`
| MutatingUse(MutatingUseContext::Call)
| MutatingUse(MutatingUseContext::AsmOutput)
| MutatingUse(MutatingUseContext::Deinit)
// Actual store that can possibly even propagate a value
| MutatingUse(MutatingUseContext::Store)
| MutatingUse(MutatingUseContext::SetDiscriminant) => {
if !self.found_assignment.insert(local) {
match &mut self.can_const_prop[local] {
// If the local can only get propagated in its own block, then we don't have
// to worry about multiple assignments, as we'll nuke the const state at the
// end of the block anyway, and inside the block we overwrite previous
// states as applicable.
ConstPropMode::OnlyInsideOwnBlock => {}
ConstPropMode::NoPropagation => {}
other @ ConstPropMode::FullConstProp => {
trace!(
"local {:?} can't be propagated because of multiple assignments. Previous state: {:?}",
local, other,
);
*other = ConstPropMode::OnlyInsideOwnBlock;
}
}
}
}
// Reading constants is allowed an arbitrary number of times
NonMutatingUse(NonMutatingUseContext::Copy)
| NonMutatingUse(NonMutatingUseContext::Move)
| NonMutatingUse(NonMutatingUseContext::Inspect)
| NonMutatingUse(NonMutatingUseContext::PlaceMention)
| NonUse(_) => {}
// These could be propagated with a smarter analysis or just some careful thinking about
// whether they'd be fine right now.
MutatingUse(MutatingUseContext::Yield)
| MutatingUse(MutatingUseContext::Drop)
| MutatingUse(MutatingUseContext::Retag)
// These can't ever be propagated under any scheme, as we can't reason about indirect
// mutation.
| NonMutatingUse(NonMutatingUseContext::SharedBorrow)
| NonMutatingUse(NonMutatingUseContext::FakeBorrow)
| NonMutatingUse(NonMutatingUseContext::AddressOf)
| MutatingUse(MutatingUseContext::Borrow)
| MutatingUse(MutatingUseContext::AddressOf) => {
trace!("local {:?} can't be propagated because it's used: {:?}", local, context);
self.can_const_prop[local] = ConstPropMode::NoPropagation;
}
MutatingUse(MutatingUseContext::Projection)
| NonMutatingUse(NonMutatingUseContext::Projection) => bug!("visit_place should not pass {context:?} for {local:?}"),
}
}
}

View File

@ -21,7 +21,32 @@ use rustc_span::def_id::DefId;
use rustc_span::DUMMY_SP;
use rustc_target::abi::{Abi, FieldIdx, Size, VariantIdx, FIRST_VARIANT};
use crate::const_prop::throw_machine_stop_str;
/// Macro for machine-specific `InterpError` without allocation.
/// (These will never be shown to the user, but they help diagnose ICEs.)
pub(crate) macro throw_machine_stop_str($($tt:tt)*) {{
// We make a new local type for it. The type itself does not carry any information,
// but its vtable (for the `MachineStopType` trait) does.
#[derive(Debug)]
struct Zst;
// Printing this type shows the desired string.
impl std::fmt::Display for Zst {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, $($tt)*)
}
}
impl rustc_middle::mir::interpret::MachineStopType for Zst {
fn diagnostic_message(&self) -> rustc_errors::DiagnosticMessage {
self.to_string().into()
}
fn add_args(
self: Box<Self>,
_: &mut dyn FnMut(rustc_errors::DiagnosticArgName, rustc_errors::DiagnosticArgValue),
) {}
}
throw_machine_stop!(Zst)
}}
// These constants are somewhat random guesses and have not been optimized.
// If `tcx.sess.mir_opt_level() >= 4`, we ignore the limits (this can become very expensive).

View File

@ -59,7 +59,6 @@ mod remove_place_mention;
mod add_subtyping_projections;
pub mod cleanup_post_borrowck;
mod const_debuginfo;
mod const_prop;
mod const_prop_lint;
mod copy_prop;
mod coroutine;

View File

@ -829,7 +829,7 @@
# target triples containing `-none`, `nvptx`, `switch`, or `-uefi`.
#no-std = <platform-specific> (bool)
# This is an array of the codegen backends that will be compiled a rustc
# This is an array of the codegen backends that will be
# compiled for this target, overriding the global rust.codegen-backends option.
# See that option for more info.
#codegen-backends = rust.codegen-backends (array)

View File

@ -261,7 +261,7 @@ impl<T, A: Allocator> RawVec<T, A> {
// and could hypothetically handle differences between stride and size, but this memory
// has already been allocated so we know it can't overflow and currently rust does not
// support such types. So we can do better by skipping some checks and avoid an unwrap.
let _: () = const { assert!(mem::size_of::<T>() % mem::align_of::<T>() == 0) };
const { assert!(mem::size_of::<T>() % mem::align_of::<T>() == 0) };
unsafe {
let align = mem::align_of::<T>();
let size = mem::size_of::<T>().unchecked_mul(self.cap.0);
@ -465,7 +465,7 @@ impl<T, A: Allocator> RawVec<T, A> {
let (ptr, layout) = if let Some(mem) = self.current_memory() { mem } else { return Ok(()) };
// See current_memory() why this assert is here
let _: () = const { assert!(mem::size_of::<T>() % mem::align_of::<T>() == 0) };
const { assert!(mem::size_of::<T>() % mem::align_of::<T>() == 0) };
// If shrinking to 0, deallocate the buffer. We don't reach this point
// for the T::IS_ZST case since current_memory() will have returned

View File

@ -78,7 +78,7 @@ pub fn current_dir() -> io::Result<PathBuf> {
/// assert!(env::set_current_dir(&root).is_ok());
/// println!("Successfully changed working directory to {}!", root.display());
/// ```
#[doc(alias = "chdir")]
#[doc(alias = "chdir", alias = "SetCurrentDirectory", alias = "SetCurrentDirectoryW")]
#[stable(feature = "env", since = "1.0.0")]
pub fn set_current_dir<P: AsRef<Path>>(path: P) -> io::Result<()> {
os_imp::chdir(path.as_ref())
@ -655,6 +655,7 @@ pub fn home_dir() -> Option<PathBuf> {
/// }
/// ```
#[must_use]
#[doc(alias = "GetTempPath", alias = "GetTempPath2")]
#[stable(feature = "env", since = "1.0.0")]
pub fn temp_dir() -> PathBuf {
os_imp::temp_dir()

View File

@ -127,6 +127,11 @@
//! trait, which provides a [`from_wide`] method to convert a native Windows
//! string (without the terminating nul character) to an [`OsString`].
//!
//! ## Other platforms
//!
//! Many other platforms provide their own extension traits in a
//! `std::os::*::ffi` module.
//!
//! ## On all platforms
//!
//! On all platforms, [`OsStr`] consists of a sequence of bytes that is encoded as a superset of
@ -135,6 +140,8 @@
//! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_encoded_bytes`] and
//! [`OsStr::from_encoded_bytes_unchecked`].
//!
//! For basic string processing, see [`OsStr::slice_encoded_bytes`].
//!
//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
//! [`env::set_var()`]: crate::env::set_var "env::set_var"

View File

@ -9,7 +9,7 @@ use crate::hash::{Hash, Hasher};
use crate::ops::{self, Range};
use crate::rc::Rc;
use crate::slice;
use crate::str::{from_utf8 as str_from_utf8, FromStr};
use crate::str::FromStr;
use crate::sync::Arc;
use crate::sys::os_str::{Buf, Slice};
@ -997,42 +997,15 @@ impl OsStr {
/// ```
#[unstable(feature = "os_str_slice", issue = "118485")]
pub fn slice_encoded_bytes<R: ops::RangeBounds<usize>>(&self, range: R) -> &Self {
#[track_caller]
fn check_valid_boundary(bytes: &[u8], index: usize) {
if index == 0 || index == bytes.len() {
return;
}
// Fast path
if bytes[index - 1].is_ascii() || bytes[index].is_ascii() {
return;
}
let (before, after) = bytes.split_at(index);
// UTF-8 takes at most 4 bytes per codepoint, so we don't
// need to check more than that.
let after = after.get(..4).unwrap_or(after);
match str_from_utf8(after) {
Ok(_) => return,
Err(err) if err.valid_up_to() != 0 => return,
Err(_) => (),
}
for len in 2..=4.min(index) {
let before = &before[index - len..];
if str_from_utf8(before).is_ok() {
return;
}
}
panic!("byte index {index} is not an OsStr boundary");
}
let encoded_bytes = self.as_encoded_bytes();
let Range { start, end } = slice::range(range, ..encoded_bytes.len());
check_valid_boundary(encoded_bytes, start);
check_valid_boundary(encoded_bytes, end);
// `check_public_boundary` should panic if the index does not lie on an
// `OsStr` boundary as described above. It's possible to do this in an
// encoding-agnostic way, but details of the internal encoding might
// permit a more efficient implementation.
self.inner.check_public_boundary(start);
self.inner.check_public_boundary(end);
// SAFETY: `slice::range` ensures that `start` and `end` are valid
let slice = unsafe { encoded_bytes.get_unchecked(start..end) };

View File

@ -194,15 +194,65 @@ fn slice_encoded_bytes() {
}
#[test]
#[should_panic(expected = "byte index 2 is not an OsStr boundary")]
#[should_panic]
fn slice_out_of_bounds() {
let crab = OsStr::new("🦀");
let _ = crab.slice_encoded_bytes(..5);
}
#[test]
#[should_panic]
fn slice_mid_char() {
let crab = OsStr::new("🦀");
let _ = crab.slice_encoded_bytes(..2);
}
#[cfg(unix)]
#[test]
#[should_panic(expected = "byte index 1 is not an OsStr boundary")]
fn slice_invalid_data() {
use crate::os::unix::ffi::OsStrExt;
let os_string = OsStr::from_bytes(b"\xFF\xFF");
let _ = os_string.slice_encoded_bytes(1..);
}
#[cfg(unix)]
#[test]
#[should_panic(expected = "byte index 1 is not an OsStr boundary")]
fn slice_partial_utf8() {
use crate::os::unix::ffi::{OsStrExt, OsStringExt};
let part_crab = OsStr::from_bytes(&"🦀".as_bytes()[..3]);
let mut os_string = OsString::from_vec(vec![0xFF]);
os_string.push(part_crab);
let _ = os_string.slice_encoded_bytes(1..);
}
#[cfg(unix)]
#[test]
fn slice_invalid_edge() {
use crate::os::unix::ffi::{OsStrExt, OsStringExt};
let os_string = OsStr::from_bytes(b"a\xFFa");
assert_eq!(os_string.slice_encoded_bytes(..1), "a");
assert_eq!(os_string.slice_encoded_bytes(1..), OsStr::from_bytes(b"\xFFa"));
assert_eq!(os_string.slice_encoded_bytes(..2), OsStr::from_bytes(b"a\xFF"));
assert_eq!(os_string.slice_encoded_bytes(2..), "a");
let os_string = OsStr::from_bytes(&"abc🦀".as_bytes()[..6]);
assert_eq!(os_string.slice_encoded_bytes(..3), "abc");
assert_eq!(os_string.slice_encoded_bytes(3..), OsStr::from_bytes(b"\xF0\x9F\xA6"));
let mut os_string = OsString::from_vec(vec![0xFF]);
os_string.push("🦀");
assert_eq!(os_string.slice_encoded_bytes(..1), OsStr::from_bytes(b"\xFF"));
assert_eq!(os_string.slice_encoded_bytes(1..), "🦀");
}
#[cfg(windows)]
#[test]
#[should_panic(expected = "byte index 3 is not an OsStr boundary")]
#[should_panic(expected = "byte index 3 lies between surrogate codepoints")]
fn slice_between_surrogates() {
use crate::os::windows::ffi::OsStringExt;
@ -216,10 +266,14 @@ fn slice_between_surrogates() {
fn slice_surrogate_edge() {
use crate::os::windows::ffi::OsStringExt;
let os_string = OsString::from_wide(&[0xD800]);
let mut with_crab = os_string.clone();
with_crab.push("🦀");
let surrogate = OsString::from_wide(&[0xD800]);
let mut pre_crab = surrogate.clone();
pre_crab.push("🦀");
assert_eq!(pre_crab.slice_encoded_bytes(..3), surrogate);
assert_eq!(pre_crab.slice_encoded_bytes(3..), "🦀");
assert_eq!(with_crab.slice_encoded_bytes(..3), os_string);
assert_eq!(with_crab.slice_encoded_bytes(3..), "🦀");
let mut post_crab = OsString::from("🦀");
post_crab.push(&surrogate);
assert_eq!(post_crab.slice_encoded_bytes(..4), "🦀");
assert_eq!(post_crab.slice_encoded_bytes(4..), surrogate);
}

View File

@ -656,6 +656,7 @@ impl File {
///
/// Note that this method alters the permissions of the underlying file,
/// even though it takes `&self` rather than `&mut self`.
#[doc(alias = "fchmod", alias = "SetFileInformationByHandle")]
#[stable(feature = "set_permissions_atomic", since = "1.16.0")]
pub fn set_permissions(&self, perm: Permissions) -> io::Result<()> {
self.inner.set_permissions(perm.0)
@ -1314,6 +1315,7 @@ impl Metadata {
/// Ok(())
/// }
/// ```
#[doc(alias = "mtime", alias = "ftLastWriteTime")]
#[stable(feature = "fs_time", since = "1.10.0")]
pub fn modified(&self) -> io::Result<SystemTime> {
self.0.modified().map(FromInner::from_inner)
@ -1349,6 +1351,7 @@ impl Metadata {
/// Ok(())
/// }
/// ```
#[doc(alias = "atime", alias = "ftLastAccessTime")]
#[stable(feature = "fs_time", since = "1.10.0")]
pub fn accessed(&self) -> io::Result<SystemTime> {
self.0.accessed().map(FromInner::from_inner)
@ -1381,6 +1384,7 @@ impl Metadata {
/// Ok(())
/// }
/// ```
#[doc(alias = "btime", alias = "birthtime", alias = "ftCreationTime")]
#[stable(feature = "fs_time", since = "1.10.0")]
pub fn created(&self) -> io::Result<SystemTime> {
self.0.created().map(FromInner::from_inner)
@ -1879,6 +1883,7 @@ impl AsInner<fs_imp::DirEntry> for DirEntry {
/// Ok(())
/// }
/// ```
#[doc(alias = "rm", alias = "unlink", alias = "DeleteFile")]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn remove_file<P: AsRef<Path>>(path: P) -> io::Result<()> {
fs_imp::unlink(path.as_ref())
@ -1917,6 +1922,7 @@ pub fn remove_file<P: AsRef<Path>>(path: P) -> io::Result<()> {
/// Ok(())
/// }
/// ```
#[doc(alias = "stat")]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn metadata<P: AsRef<Path>>(path: P) -> io::Result<Metadata> {
fs_imp::stat(path.as_ref()).map(Metadata)
@ -1951,6 +1957,7 @@ pub fn metadata<P: AsRef<Path>>(path: P) -> io::Result<Metadata> {
/// Ok(())
/// }
/// ```
#[doc(alias = "lstat")]
#[stable(feature = "symlink_metadata", since = "1.1.0")]
pub fn symlink_metadata<P: AsRef<Path>>(path: P) -> io::Result<Metadata> {
fs_imp::lstat(path.as_ref()).map(Metadata)
@ -1994,6 +2001,7 @@ pub fn symlink_metadata<P: AsRef<Path>>(path: P) -> io::Result<Metadata> {
/// Ok(())
/// }
/// ```
#[doc(alias = "mv", alias = "MoveFile", alias = "MoveFileEx")]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn rename<P: AsRef<Path>, Q: AsRef<Path>>(from: P, to: Q) -> io::Result<()> {
fs_imp::rename(from.as_ref(), to.as_ref())
@ -2052,6 +2060,9 @@ pub fn rename<P: AsRef<Path>, Q: AsRef<Path>>(from: P, to: Q) -> io::Result<()>
/// Ok(())
/// }
/// ```
#[doc(alias = "cp")]
#[doc(alias = "CopyFile", alias = "CopyFileEx")]
#[doc(alias = "fclonefileat", alias = "fcopyfile")]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn copy<P: AsRef<Path>, Q: AsRef<Path>>(from: P, to: Q) -> io::Result<u64> {
fs_imp::copy(from.as_ref(), to.as_ref())
@ -2096,6 +2107,7 @@ pub fn copy<P: AsRef<Path>, Q: AsRef<Path>>(from: P, to: Q) -> io::Result<u64> {
/// Ok(())
/// }
/// ```
#[doc(alias = "CreateHardLink", alias = "linkat")]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn hard_link<P: AsRef<Path>, Q: AsRef<Path>>(original: P, link: Q) -> io::Result<()> {
fs_imp::link(original.as_ref(), link.as_ref())
@ -2245,7 +2257,7 @@ pub fn canonicalize<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
/// Ok(())
/// }
/// ```
#[doc(alias = "mkdir")]
#[doc(alias = "mkdir", alias = "CreateDirectory")]
#[stable(feature = "rust1", since = "1.0.0")]
#[cfg_attr(not(test), rustc_diagnostic_item = "fs_create_dir")]
pub fn create_dir<P: AsRef<Path>>(path: P) -> io::Result<()> {
@ -2326,7 +2338,7 @@ pub fn create_dir_all<P: AsRef<Path>>(path: P) -> io::Result<()> {
/// Ok(())
/// }
/// ```
#[doc(alias = "rmdir")]
#[doc(alias = "rmdir", alias = "RemoveDirectory")]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn remove_dir<P: AsRef<Path>>(path: P) -> io::Result<()> {
fs_imp::rmdir(path.as_ref())
@ -2449,6 +2461,7 @@ pub fn remove_dir_all<P: AsRef<Path>>(path: P) -> io::Result<()> {
/// Ok(())
/// }
/// ```
#[doc(alias = "ls", alias = "opendir", alias = "FindFirstFile", alias = "FindNextFile")]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn read_dir<P: AsRef<Path>>(path: P) -> io::Result<ReadDir> {
fs_imp::readdir(path.as_ref()).map(ReadDir)
@ -2484,6 +2497,7 @@ pub fn read_dir<P: AsRef<Path>>(path: P) -> io::Result<ReadDir> {
/// Ok(())
/// }
/// ```
#[doc(alias = "chmod", alias = "SetFileAttributes")]
#[stable(feature = "set_permissions", since = "1.1.0")]
pub fn set_permissions<P: AsRef<Path>>(path: P, perm: Permissions) -> io::Result<()> {
fs_imp::set_perm(path.as_ref(), perm.0)

View File

@ -173,51 +173,61 @@ pub trait FileExt {
///
/// This corresponds to the `fd_tell` syscall and is similar to
/// `seek` where you offset 0 bytes from the current position.
#[doc(alias = "fd_tell")]
fn tell(&self) -> io::Result<u64>;
/// Adjust the flags associated with this file.
///
/// This corresponds to the `fd_fdstat_set_flags` syscall.
#[doc(alias = "fd_fdstat_set_flags")]
fn fdstat_set_flags(&self, flags: u16) -> io::Result<()>;
/// Adjust the rights associated with this file.
///
/// This corresponds to the `fd_fdstat_set_rights` syscall.
#[doc(alias = "fd_fdstat_set_rights")]
fn fdstat_set_rights(&self, rights: u64, inheriting: u64) -> io::Result<()>;
/// Provide file advisory information on a file descriptor.
///
/// This corresponds to the `fd_advise` syscall.
#[doc(alias = "fd_advise")]
fn advise(&self, offset: u64, len: u64, advice: u8) -> io::Result<()>;
/// Force the allocation of space in a file.
///
/// This corresponds to the `fd_allocate` syscall.
#[doc(alias = "fd_allocate")]
fn allocate(&self, offset: u64, len: u64) -> io::Result<()>;
/// Create a directory.
///
/// This corresponds to the `path_create_directory` syscall.
#[doc(alias = "path_create_directory")]
fn create_directory<P: AsRef<Path>>(&self, dir: P) -> io::Result<()>;
/// Read the contents of a symbolic link.
///
/// This corresponds to the `path_readlink` syscall.
#[doc(alias = "path_readlink")]
fn read_link<P: AsRef<Path>>(&self, path: P) -> io::Result<PathBuf>;
/// Return the attributes of a file or directory.
///
/// This corresponds to the `path_filestat_get` syscall.
#[doc(alias = "path_filestat_get")]
fn metadata_at<P: AsRef<Path>>(&self, lookup_flags: u32, path: P) -> io::Result<Metadata>;
/// Unlink a file.
///
/// This corresponds to the `path_unlink_file` syscall.
#[doc(alias = "path_unlink_file")]
fn remove_file<P: AsRef<Path>>(&self, path: P) -> io::Result<()>;
/// Remove a directory.
///
/// This corresponds to the `path_remove_directory` syscall.
#[doc(alias = "path_remove_directory")]
fn remove_directory<P: AsRef<Path>>(&self, path: P) -> io::Result<()>;
}
@ -359,6 +369,7 @@ pub trait OpenOptionsExt {
/// Open a file or directory.
///
/// This corresponds to the `path_open` syscall.
#[doc(alias = "path_open")]
fn open_at<P: AsRef<Path>>(&self, file: &File, path: P) -> io::Result<File>;
}
@ -500,6 +511,7 @@ impl DirEntryExt for fs::DirEntry {
/// Create a hard link.
///
/// This corresponds to the `path_link` syscall.
#[doc(alias = "path_link")]
pub fn link<P: AsRef<Path>, U: AsRef<Path>>(
old_fd: &File,
old_flags: u32,
@ -518,6 +530,7 @@ pub fn link<P: AsRef<Path>, U: AsRef<Path>>(
/// Rename a file or directory.
///
/// This corresponds to the `path_rename` syscall.
#[doc(alias = "path_rename")]
pub fn rename<P: AsRef<Path>, U: AsRef<Path>>(
old_fd: &File,
old_path: P,
@ -534,6 +547,7 @@ pub fn rename<P: AsRef<Path>, U: AsRef<Path>>(
/// Create a symbolic link.
///
/// This corresponds to the `path_symlink` syscall.
#[doc(alias = "path_symlink")]
pub fn symlink<P: AsRef<Path>, U: AsRef<Path>>(
old_path: P,
fd: &File,

View File

@ -211,6 +211,49 @@ impl Slice {
unsafe { mem::transmute(s) }
}
#[track_caller]
#[inline]
pub fn check_public_boundary(&self, index: usize) {
if index == 0 || index == self.inner.len() {
return;
}
if index < self.inner.len()
&& (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii())
{
return;
}
slow_path(&self.inner, index);
/// We're betting that typical splits will involve an ASCII character.
///
/// Putting the expensive checks in a separate function generates notably
/// better assembly.
#[track_caller]
#[inline(never)]
fn slow_path(bytes: &[u8], index: usize) {
let (before, after) = bytes.split_at(index);
// UTF-8 takes at most 4 bytes per codepoint, so we don't
// need to check more than that.
let after = after.get(..4).unwrap_or(after);
match str::from_utf8(after) {
Ok(_) => return,
Err(err) if err.valid_up_to() != 0 => return,
Err(_) => (),
}
for len in 2..=4.min(index) {
let before = &before[index - len..];
if str::from_utf8(before).is_ok() {
return;
}
}
panic!("byte index {index} is not an OsStr boundary");
}
}
#[inline]
pub fn from_str(s: &str) -> &Slice {
unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) }

View File

@ -6,7 +6,7 @@ use crate::fmt;
use crate::mem;
use crate::rc::Rc;
use crate::sync::Arc;
use crate::sys_common::wtf8::{Wtf8, Wtf8Buf};
use crate::sys_common::wtf8::{check_utf8_boundary, Wtf8, Wtf8Buf};
use crate::sys_common::{AsInner, FromInner, IntoInner};
#[derive(Clone, Hash)]
@ -171,6 +171,11 @@ impl Slice {
mem::transmute(Wtf8::from_bytes_unchecked(s))
}
#[track_caller]
pub fn check_public_boundary(&self, index: usize) {
check_utf8_boundary(&self.inner, index);
}
#[inline]
pub fn from_str(s: &str) -> &Slice {
unsafe { mem::transmute(Wtf8::from_str(s)) }

View File

@ -885,15 +885,43 @@ fn decode_surrogate_pair(lead: u16, trail: u16) -> char {
unsafe { char::from_u32_unchecked(code_point) }
}
/// Copied from core::str::StrPrelude::is_char_boundary
/// Copied from str::is_char_boundary
#[inline]
pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool {
if index == slice.len() {
if index == 0 {
return true;
}
match slice.bytes.get(index) {
None => false,
Some(&b) => b < 128 || b >= 192,
None => index == slice.len(),
Some(&b) => (b as i8) >= -0x40,
}
}
/// Verify that `index` is at the edge of either a valid UTF-8 codepoint
/// (i.e. a codepoint that's not a surrogate) or of the whole string.
///
/// These are the cases currently permitted by `OsStr::slice_encoded_bytes`.
/// Splitting between surrogates is valid as far as WTF-8 is concerned, but
/// we do not permit it in the public API because WTF-8 is considered an
/// implementation detail.
#[track_caller]
#[inline]
pub fn check_utf8_boundary(slice: &Wtf8, index: usize) {
if index == 0 {
return;
}
match slice.bytes.get(index) {
Some(0xED) => (), // Might be a surrogate
Some(&b) if (b as i8) >= -0x40 => return,
Some(_) => panic!("byte index {index} is not a codepoint boundary"),
None if index == slice.len() => return,
None => panic!("byte index {index} is out of bounds"),
}
if slice.bytes[index + 1] >= 0xA0 {
// There's a surrogate after index. Now check before index.
if index >= 3 && slice.bytes[index - 3] == 0xED && slice.bytes[index - 2] >= 0xA0 {
panic!("byte index {index} lies between surrogate codepoints");
}
}
}

View File

@ -663,3 +663,65 @@ fn wtf8_to_owned() {
assert_eq!(string.bytes, b"\xED\xA0\x80");
assert!(!string.is_known_utf8);
}
#[test]
fn wtf8_valid_utf8_boundaries() {
let mut string = Wtf8Buf::from_str("aé 💩");
string.push(CodePoint::from_u32(0xD800).unwrap());
string.push(CodePoint::from_u32(0xD800).unwrap());
check_utf8_boundary(&string, 0);
check_utf8_boundary(&string, 1);
check_utf8_boundary(&string, 3);
check_utf8_boundary(&string, 4);
check_utf8_boundary(&string, 8);
check_utf8_boundary(&string, 14);
assert_eq!(string.len(), 14);
string.push_char('a');
check_utf8_boundary(&string, 14);
check_utf8_boundary(&string, 15);
let mut string = Wtf8Buf::from_str("a");
string.push(CodePoint::from_u32(0xD800).unwrap());
check_utf8_boundary(&string, 1);
let mut string = Wtf8Buf::from_str("\u{D7FF}");
string.push(CodePoint::from_u32(0xD800).unwrap());
check_utf8_boundary(&string, 3);
let mut string = Wtf8Buf::new();
string.push(CodePoint::from_u32(0xD800).unwrap());
string.push_char('\u{D7FF}');
check_utf8_boundary(&string, 3);
}
#[test]
#[should_panic(expected = "byte index 4 is out of bounds")]
fn wtf8_utf8_boundary_out_of_bounds() {
let string = Wtf8::from_str("");
check_utf8_boundary(&string, 4);
}
#[test]
#[should_panic(expected = "byte index 1 is not a codepoint boundary")]
fn wtf8_utf8_boundary_inside_codepoint() {
let string = Wtf8::from_str("é");
check_utf8_boundary(&string, 1);
}
#[test]
#[should_panic(expected = "byte index 1 is not a codepoint boundary")]
fn wtf8_utf8_boundary_inside_surrogate() {
let mut string = Wtf8Buf::new();
string.push(CodePoint::from_u32(0xD800).unwrap());
check_utf8_boundary(&string, 1);
}
#[test]
#[should_panic(expected = "byte index 3 lies between surrogate codepoints")]
fn wtf8_utf8_boundary_between_surrogates() {
let mut string = Wtf8Buf::new();
string.push(CodePoint::from_u32(0xD800).unwrap());
string.push(CodePoint::from_u32(0xD800).unwrap());
check_utf8_boundary(&string, 3);
}

View File

@ -3,6 +3,8 @@ error[E0093]: unrecognized intrinsic function: `foo`
|
LL | fn foo();
| ^^^^^^^^^ unrecognized intrinsic
|
= help: if you're adding an intrinsic, be sure to update `check_intrinsic_type`
error: aborting due to 1 previous error

View File

@ -187,12 +187,16 @@ error[E0093]: unrecognized intrinsic function: `f1`
|
LL | extern "rust-intrinsic" fn f1() {}
| ^^ unrecognized intrinsic
|
= help: if you're adding an intrinsic, be sure to update `check_intrinsic_type`
error[E0093]: unrecognized intrinsic function: `f2`
--> $DIR/feature-gate-abi.rs:18:32
|
LL | extern "platform-intrinsic" fn f2() {}
| ^^ unrecognized intrinsic
|
= help: if you're adding an intrinsic, be sure to update `check_intrinsic_type`
error: intrinsic must be in `extern "rust-intrinsic" { ... }` block
--> $DIR/feature-gate-abi.rs:25:32

View File

@ -21,12 +21,16 @@ error[E0093]: unrecognized intrinsic function: `bar`
|
LL | fn bar();
| ^^^^^^^^^ unrecognized intrinsic
|
= help: if you're adding an intrinsic, be sure to update `check_intrinsic_type`
error[E0093]: unrecognized intrinsic function: `baz`
--> $DIR/feature-gate-intrinsics.rs:5:28
|
LL | extern "rust-intrinsic" fn baz() {}
| ^^^ unrecognized intrinsic
|
= help: if you're adding an intrinsic, be sure to update `check_intrinsic_type`
error: intrinsic must be in `extern "rust-intrinsic" { ... }` block
--> $DIR/feature-gate-intrinsics.rs:5:34

View File

@ -9,6 +9,8 @@ error[E0093]: unrecognized intrinsic function: `hello`
|
LL | extern "rust-intrinsic" fn hello() {
| ^^^^^ unrecognized intrinsic
|
= help: if you're adding an intrinsic, be sure to update `check_intrinsic_type`
error: intrinsic must be in `extern "rust-intrinsic" { ... }` block
--> $DIR/intrinsics-always-extern.rs:8:43

View File

@ -0,0 +1,10 @@
macro_rules! test {
($wrong:id) => {};
} //~^ ERROR: invalid fragment specifier `id`
// guard against breaking raw identifier diagnostic
macro_rules! test_raw_identifer {
($wrong:r#if) => {};
} //~^ ERROR: invalid fragment specifier `r#if`
fn main() {}

View File

@ -0,0 +1,18 @@
error: invalid fragment specifier `id`
--> $DIR/invalid-fragment-specifier.rs:2:6
|
LL | ($wrong:id) => {};
| ^^^^^^^^^
|
= help: valid fragment specifiers are `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, `literal`, `path`, `meta`, `tt`, `item` and `vis`
error: invalid fragment specifier `r#if`
--> $DIR/invalid-fragment-specifier.rs:7:6
|
LL | ($wrong:r#if) => {};
| ^^^^^^^^^^^
|
= help: valid fragment specifiers are `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, `literal`, `path`, `meta`, `tt`, `item` and `vis`
error: aborting due to 2 previous errors

View File

@ -1,8 +0,0 @@
macro_rules! foo(
($x:foo) => ()
//~^ ERROR invalid fragment specifier
);
fn main() {
foo!(foo);
}

View File

@ -1,10 +0,0 @@
error: invalid fragment specifier `foo`
--> $DIR/macro-invalid-fragment-spec.rs:2:6
|
LL | ($x:foo) => ()
| ^^^^^^
|
= help: valid fragment specifiers are `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, `literal`, `path`, `meta`, `tt`, `item` and `vis`
error: aborting due to 1 previous error