Rollup merge of #132849 - RalfJung:miri-sync, r=RalfJung

Miri subtree update r? `@ghost`
2024-11-10 17:43:07 +01:00 · 2024-11-10 17:43:07 +01:00 · 2681dcb189
commit 2681dcb189
parent b95232dabb 881f2ec1b0
52 changed files with 960 additions and 375 deletions
--- a/src/tools/miri/.github/workflows/ci.yml
+++ b/src/tools/miri/.github/workflows/ci.yml
@ -58,11 +58,20 @@ jobs:
      - name: rustdoc
        run: RUSTDOCFLAGS="-Dwarnings" ./miri doc --document-private-items

+  coverage:
+    name: coverage report
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/workflows/setup
+      - name: coverage
+        run: ./miri test --coverage
+
  # Summary job for the merge queue.
  # ALL THE PREVIOUS JOBS NEED TO BE ADDED TO THE `needs` SECTION OF THIS JOB!
  # And they should be added below in `cron-fail-notify` as well.
  conclusion:
-    needs: [build, style]
+    needs: [build, style, coverage]
    # We need to ensure this job does *not* get skipped if its dependencies fail,
    # because a skipped job is considered a success by GitHub. So we have to
    # overwrite `if:`. We use `!cancelled()` to ensure the job does still not get run
@ -86,7 +95,7 @@ jobs:
        contents: write
        # ... and create a PR.
        pull-requests: write
-    needs: [build, style]
+    needs: [build, style, coverage]
    if: ${{ github.event_name == 'schedule' && failure() }}
    steps:
      # Send a Zulip notification
--- a/src/tools/miri/CONTRIBUTING.md
+++ b/src/tools/miri/CONTRIBUTING.md
@ -13,6 +13,25 @@ for a list of Miri maintainers.

 [Rust Zulip]: https://rust-lang.zulipchat.com

+### Pull review process
+
+When you get a review, please take care of the requested changes in new commits. Do not amend
+existing commits. Generally avoid force-pushing. The only time you should force push is when there
+is a conflict with the master branch (in that case you should rebase across master, not merge), and
+all the way at the end of the review process when the reviewer tells you that the PR is done and you
+should squash the commits. For the latter case, use `git rebase --keep-base ...` to squash without
+changing the base commit your PR branches off of. Use your own judgment and the reviewer's guidance
+to decide whether the PR should be squashed into a single commit or multiple logically separate
+commits. (All this is to work around the fact that Github is quite bad at dealing with force pushes
+and does not support `git range-diff`. Maybe one day Github will be good at git and then life can
+become easier.)
+
+Most PRs bounce back and forth between the reviewer and the author several times, so it is good to
+keep track of who is expected to take the next step. We are using the `S-waiting-for-review` and
+`S-waiting-for-author` labels for that. If a reviewer asked you to do some changes and you think
+they are all taken care of, post a comment saying `@rustbot ready` to mark a PR as ready for the
+next round of review.
+
 ### Larger-scale contributions

 If you are thinking about making a larger-scale contribution -- in particular anything that needs
@ -45,14 +64,6 @@ process for such contributions:
 This process is largely informal, and its primary goal is to more clearly communicate expectations.
 Please get in touch with us if you have any questions!

-### Managing the review state
-
-Most PRs bounce back and forth between the reviewer and the author several times, so it is good to
-keep track of who is expected to take the next step. We are using the `S-waiting-for-review` and
-`S-waiting-for-author` labels for that. If a reviewer asked you to do some changes and you think
-they are all taken care of, post a comment saying `@rustbot ready` to mark a PR as ready for the
-next round of review.
-
 ## Preparing the build environment

 Miri heavily relies on internal and unstable rustc interfaces to execute MIR,
--- a/src/tools/miri/ci/ci.sh
+++ b/src/tools/miri/ci/ci.sh
@ -154,7 +154,7 @@ case $HOST_TARGET in
    TEST_TARGET=i686-unknown-freebsd   run_tests_minimal $BASIC $UNIX time hashmap random threadname pthread fs libc-pipe
    TEST_TARGET=x86_64-unknown-illumos run_tests_minimal $BASIC $UNIX time hashmap random thread sync available-parallelism tls libc-pipe
    TEST_TARGET=x86_64-pc-solaris      run_tests_minimal $BASIC $UNIX time hashmap random thread sync available-parallelism tls libc-pipe
-    TEST_TARGET=aarch64-linux-android  run_tests_minimal $BASIC $UNIX time hashmap threadname pthread
+    TEST_TARGET=aarch64-linux-android  run_tests_minimal $BASIC $UNIX time hashmap random sync threadname pthread
    TEST_TARGET=wasm32-wasip2          run_tests_minimal $BASIC wasm
    TEST_TARGET=wasm32-unknown-unknown run_tests_minimal no_std empty_main wasm # this target doesn't really have std
    TEST_TARGET=thumbv7em-none-eabihf  run_tests_minimal no_std
--- a/src/tools/miri/clippy.toml
+++ b/src/tools/miri/clippy.toml
@ -1 +1 @@
-arithmetic-side-effects-allowed = ["rustc_abi::Size"]
+arithmetic-side-effects-allowed = ["rustc_abi::Size", "rustc_apfloat::ieee::IeeeFloat"]
--- a/src/tools/miri/miri-script/Cargo.lock
+++ b/src/tools/miri/miri-script/Cargo.lock
@ -63,6 +63,12 @@ dependencies = [
 "windows-sys 0.52.0",
 ]

+[[package]]
+name = "fastrand"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
+
 [[package]]
 name = "getrandom"
 version = "0.2.12"
@ -100,9 +106,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"

 [[package]]
 name = "libc"
-version = "0.2.153"
+version = "0.2.159"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5"

 [[package]]
 name = "libredox"
@ -138,11 +144,18 @@ dependencies = [
 "rustc_version",
 "serde_json",
 "shell-words",
+ "tempfile",
 "walkdir",
 "which",
 "xshell",
 ]

+[[package]]
+name = "once_cell"
+version = "1.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
+
 [[package]]
 name = "option-ext"
 version = "0.2.0"
@ -195,9 +208,9 @@ dependencies = [

 [[package]]
 name = "rustix"
-version = "0.38.34"
+version = "0.38.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
+checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
 dependencies = [
 "bitflags",
 "errno",
@ -276,6 +289,19 @@ dependencies = [
 "unicode-ident",
 ]

+[[package]]
+name = "tempfile"
+version = "3.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b"
+dependencies = [
+ "cfg-if",
+ "fastrand",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.57"
@ -357,6 +383,15 @@ dependencies = [
 "windows-targets 0.52.6",
 ]

+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-targets"
 version = "0.48.5"
--- a/src/tools/miri/miri-script/Cargo.toml
+++ b/src/tools/miri/miri-script/Cargo.toml
@ -24,3 +24,4 @@ rustc_version = "0.4"
 dunce = "1.0.4"
 directories = "5"
 serde_json = "1"
+tempfile = "3.13.0"
--- a/src/tools/miri/miri-script/src/commands.rs
+++ b/src/tools/miri/miri-script/src/commands.rs
@ -172,7 +172,8 @@ pub fn exec(self) -> Result<()> {
            Command::Install { flags } => Self::install(flags),
            Command::Build { flags } => Self::build(flags),
            Command::Check { flags } => Self::check(flags),
-            Command::Test { bless, flags, target } => Self::test(bless, flags, target),
+            Command::Test { bless, flags, target, coverage } =>
+                Self::test(bless, flags, target, coverage),
            Command::Run { dep, verbose, many_seeds, target, edition, flags } =>
                Self::run(dep, verbose, many_seeds, target, edition, flags),
            Command::Doc { flags } => Self::doc(flags),
@ -458,9 +459,20 @@ fn clippy(flags: Vec<String>) -> Result<()> {
        Ok(())
    }

-    fn test(bless: bool, mut flags: Vec<String>, target: Option<String>) -> Result<()> {
+    fn test(
+        bless: bool,
+        mut flags: Vec<String>,
+        target: Option<String>,
+        coverage: bool,
+    ) -> Result<()> {
        let mut e = MiriEnv::new()?;

+        let coverage = coverage.then_some(crate::coverage::CoverageReport::new()?);
+
+        if let Some(report) = &coverage {
+            report.add_env_vars(&mut e)?;
+        }
+
        // Prepare a sysroot. (Also builds cargo-miri, which we need.)
        e.build_miri_sysroot(/* quiet */ false, target.as_deref())?;

@ -479,6 +491,11 @@ fn test(bless: bool, mut flags: Vec<String>, target: Option<String>) -> Result<(
        // Then test, and let caller control flags.
        // Only in root project as `cargo-miri` has no tests.
        e.test(".", &flags)?;
+
+        if let Some(coverage) = &coverage {
+            coverage.show_coverage_report(&e)?;
+        }
+
        Ok(())
    }

--- a/src/tools/miri/miri-script/src/coverage.rs
+++ b/src/tools/miri/miri-script/src/coverage.rs
@ -0,0 +1,91 @@
+use std::path::PathBuf;
+
+use anyhow::{Context, Result};
+use path_macro::path;
+use tempfile::TempDir;
+use xshell::cmd;
+
+use crate::util::MiriEnv;
+
+/// CoverageReport can generate code coverage reports for miri.
+pub struct CoverageReport {
+    /// path is a temporary directory where intermediate coverage artifacts will be stored.
+    /// (The final output will be stored in a permanent location.)
+    path: TempDir,
+}
+
+impl CoverageReport {
+    /// Creates a new CoverageReport.
+    ///
+    /// # Errors
+    ///
+    /// An error will be returned if a temporary directory could not be created.
+    pub fn new() -> Result<Self> {
+        Ok(Self { path: TempDir::new()? })
+    }
+
+    /// add_env_vars will add the required environment variables to MiriEnv `e`.
+    pub fn add_env_vars(&self, e: &mut MiriEnv) -> Result<()> {
+        let mut rustflags = e.sh.var("RUSTFLAGS")?;
+        rustflags.push_str(" -C instrument-coverage");
+        e.sh.set_var("RUSTFLAGS", rustflags);
+
+        // Copy-pasting from: https://doc.rust-lang.org/rustc/instrument-coverage.html#instrumentation-based-code-coverage
+        // The format symbols below have the following meaning:
+        // - %p - The process ID.
+        // - %Nm - the instrumented binary’s signature:
+        //   The runtime creates a pool of N raw profiles, used for on-line
+        //   profile merging. The runtime takes care of selecting a raw profile
+        //   from the pool, locking it, and updating it before the program
+        //   exits. N must be between 1 and 9, and defaults to 1 if omitted
+        //   (with simply %m).
+        //
+        // Additionally the default for LLVM_PROFILE_FILE is default_%m_%p.profraw.
+        // So we just use the same template, replacing "default" with "miri".
+        let file_template = self.path.path().join("miri_%m_%p.profraw");
+        e.sh.set_var("LLVM_PROFILE_FILE", file_template);
+        Ok(())
+    }
+
+    /// show_coverage_report will print coverage information using the artifact
+    /// files in `self.path`.
+    pub fn show_coverage_report(&self, e: &MiriEnv) -> Result<()> {
+        let profraw_files = self.profraw_files()?;
+
+        let profdata_bin = path!(e.libdir / ".." / "bin" / "llvm-profdata");
+
+        let merged_file = path!(e.miri_dir / "target" / "coverage.profdata");
+
+        // Merge the profraw files
+        cmd!(e.sh, "{profdata_bin} merge -sparse {profraw_files...} -o {merged_file}")
+            .quiet()
+            .run()?;
+
+        // Create the coverage report.
+        let cov_bin = path!(e.libdir / ".." / "bin" / "llvm-cov");
+        let miri_bin =
+            e.build_get_binary(".").context("failed to get filename of miri executable")?;
+        cmd!(
+            e.sh,
+            "{cov_bin} report --instr-profile={merged_file} --object {miri_bin} --sources src/"
+        )
+        .run()?;
+
+        println!("Profile data saved in {}", merged_file.display());
+        Ok(())
+    }
+
+    /// profraw_files returns the profraw files in `self.path`.
+    ///
+    /// # Errors
+    ///
+    /// An error will be returned if `self.path` can't be read.
+    fn profraw_files(&self) -> Result<Vec<PathBuf>> {
+        Ok(std::fs::read_dir(&self.path)?
+            .filter_map(|r| r.ok())
+            .filter(|e| e.file_type().is_ok_and(|t| t.is_file()))
+            .map(|e| e.path())
+            .filter(|p| p.extension().is_some_and(|e| e == "profraw"))
+            .collect())
+    }
+}
--- a/src/tools/miri/miri-script/src/main.rs
+++ b/src/tools/miri/miri-script/src/main.rs
@ -2,6 +2,7 @@

 mod args;
 mod commands;
+mod coverage;
 mod util;

 use std::ops::Range;
@ -34,6 +35,8 @@ pub enum Command {
        /// The cross-interpretation target.
        /// If none then the host is the target.
        target: Option<String>,
+        /// Produce coverage report if set.
+        coverage: bool,
        /// Flags that are passed through to the test harness.
        flags: Vec<String>,
    },
@ -158,9 +161,12 @@ fn main() -> Result<()> {
            let mut target = None;
            let mut bless = false;
            let mut flags = Vec::new();
+            let mut coverage = false;
            loop {
                if args.get_long_flag("bless")? {
                    bless = true;
+                } else if args.get_long_flag("coverage")? {
+                    coverage = true;
                } else if let Some(val) = args.get_long_opt("target")? {
                    target = Some(val);
                } else if let Some(flag) = args.get_other() {
@ -169,7 +175,7 @@ fn main() -> Result<()> {
                    break;
                }
            }
-            Command::Test { bless, flags, target }
+            Command::Test { bless, flags, target, coverage }
        }
        Some("run") => {
            let mut dep = false;
--- a/src/tools/miri/miri-script/src/util.rs
+++ b/src/tools/miri/miri-script/src/util.rs
@ -41,6 +41,8 @@ pub struct MiriEnv {
    pub sysroot: PathBuf,
    /// The shell we use.
    pub sh: Shell,
+    /// The library dir in the sysroot.
+    pub libdir: PathBuf,
 }

 impl MiriEnv {
@ -96,7 +98,8 @@ pub fn new() -> Result<Self> {
        // so that Windows can find the DLLs.
        if cfg!(windows) {
            let old_path = sh.var("PATH")?;
-            let new_path = env::join_paths(iter::once(libdir).chain(env::split_paths(&old_path)))?;
+            let new_path =
+                env::join_paths(iter::once(libdir.clone()).chain(env::split_paths(&old_path)))?;
            sh.set_var("PATH", new_path);
        }

@ -111,7 +114,7 @@ pub fn new() -> Result<Self> {
            std::process::exit(1);
        }

-        Ok(MiriEnv { miri_dir, toolchain, sh, sysroot, cargo_extra_flags })
+        Ok(MiriEnv { miri_dir, toolchain, sh, sysroot, cargo_extra_flags, libdir })
    }

    pub fn cargo_cmd(&self, crate_dir: impl AsRef<OsStr>, cmd: &str) -> Cmd<'_> {
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@ -1 +1 @@
-814df6e50eaf89b90793e7d9618bb60f1f18377a
+668959740f97e7a22ae340742886d330ab63950f
--- a/src/tools/miri/src/alloc_addresses/mod.rs
+++ b/src/tools/miri/src/alloc_addresses/mod.rs
@ -111,8 +111,8 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
    // Returns the exposed `AllocId` that corresponds to the specified addr,
    // or `None` if the addr is out of bounds
    fn alloc_id_from_addr(&self, addr: u64, size: i64) -> Option<AllocId> {
-        let ecx = self.eval_context_ref();
-        let global_state = ecx.machine.alloc_addresses.borrow();
+        let this = self.eval_context_ref();
+        let global_state = this.machine.alloc_addresses.borrow();
        assert!(global_state.provenance_mode != ProvenanceMode::Strict);

        // We always search the allocation to the right of this address. So if the size is structly
@ -134,7 +134,7 @@ fn alloc_id_from_addr(&self, addr: u64, size: i64) -> Option<AllocId> {
                // entered for addresses that are not the base address, so even zero-sized
                // allocations will get recognized at their base address -- but all other
                // allocations will *not* be recognized at their "end" address.
-                let size = ecx.get_alloc_info(alloc_id).size;
+                let size = this.get_alloc_info(alloc_id).size;
                if offset < size.bytes() { Some(alloc_id) } else { None }
            }
        }?;
@ -142,7 +142,7 @@ fn alloc_id_from_addr(&self, addr: u64, size: i64) -> Option<AllocId> {
        // We only use this provenance if it has been exposed.
        if global_state.exposed.contains(&alloc_id) {
            // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
-            debug_assert!(ecx.is_alloc_live(alloc_id));
+            debug_assert!(this.is_alloc_live(alloc_id));
            Some(alloc_id)
        } else {
            None
@ -155,9 +155,9 @@ fn addr_from_alloc_id_uncached(
        alloc_id: AllocId,
        memory_kind: MemoryKind,
    ) -> InterpResult<'tcx, u64> {
-        let ecx = self.eval_context_ref();
-        let mut rng = ecx.machine.rng.borrow_mut();
-        let info = ecx.get_alloc_info(alloc_id);
+        let this = self.eval_context_ref();
+        let mut rng = this.machine.rng.borrow_mut();
+        let info = this.get_alloc_info(alloc_id);
        // This is either called immediately after allocation (and then cached), or when
        // adjusting `tcx` pointers (which never get freed). So assert that we are looking
        // at a live allocation. This also ensures that we never re-assign an address to an
@ -166,12 +166,12 @@ fn addr_from_alloc_id_uncached(
        assert!(!matches!(info.kind, AllocKind::Dead));

        // This allocation does not have a base address yet, pick or reuse one.
-        if ecx.machine.native_lib.is_some() {
+        if this.machine.native_lib.is_some() {
            // In native lib mode, we use the "real" address of the bytes for this allocation.
            // This ensures the interpreted program and native code have the same view of memory.
            let base_ptr = match info.kind {
                AllocKind::LiveData => {
-                    if ecx.tcx.try_get_global_alloc(alloc_id).is_some() {
+                    if this.tcx.try_get_global_alloc(alloc_id).is_some() {
                        // For new global allocations, we always pre-allocate the memory to be able use the machine address directly.
                        let prepared_bytes = MiriAllocBytes::zeroed(info.size, info.align)
                            .unwrap_or_else(|| {
@ -185,7 +185,7 @@ fn addr_from_alloc_id_uncached(
                            .unwrap();
                        ptr
                    } else {
-                        ecx.get_alloc_bytes_unchecked_raw(alloc_id)?
+                        this.get_alloc_bytes_unchecked_raw(alloc_id)?
                    }
                }
                AllocKind::Function | AllocKind::VTable => {
@ -203,11 +203,15 @@ fn addr_from_alloc_id_uncached(
            return interp_ok(base_ptr.expose_provenance().try_into().unwrap());
        }
        // We are not in native lib mode, so we control the addresses ourselves.
-        if let Some((reuse_addr, clock)) =
-            global_state.reuse.take_addr(&mut *rng, info.size, info.align, memory_kind, ecx.active_thread())
-        {
+        if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
+            &mut *rng,
+            info.size,
+            info.align,
+            memory_kind,
+            this.active_thread(),
+        ) {
            if let Some(clock) = clock {
-                ecx.acquire_clock(&clock);
+                this.acquire_clock(&clock);
            }
            interp_ok(reuse_addr)
        } else {
@ -230,7 +234,7 @@ fn addr_from_alloc_id_uncached(
                .checked_add(max(info.size.bytes(), 1))
                .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
            // Even if `Size` didn't overflow, we might still have filled up the address space.
-            if global_state.next_base_addr > ecx.target_usize_max() {
+            if global_state.next_base_addr > this.target_usize_max() {
                throw_exhaust!(AddressSpaceFull);
            }

@ -243,8 +247,8 @@ fn addr_from_alloc_id(
        alloc_id: AllocId,
        memory_kind: MemoryKind,
    ) -> InterpResult<'tcx, u64> {
-        let ecx = self.eval_context_ref();
-        let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
+        let this = self.eval_context_ref();
+        let mut global_state = this.machine.alloc_addresses.borrow_mut();
        let global_state = &mut *global_state;

        match global_state.base_addr.get(&alloc_id) {
@ -283,22 +287,22 @@ fn addr_from_alloc_id(
 impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
 pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
    fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
-        let ecx = self.eval_context_mut();
-        let global_state = ecx.machine.alloc_addresses.get_mut();
+        let this = self.eval_context_mut();
+        let global_state = this.machine.alloc_addresses.get_mut();
        // In strict mode, we don't need this, so we can save some cycles by not tracking it.
        if global_state.provenance_mode == ProvenanceMode::Strict {
            return interp_ok(());
        }
        // Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation
        // via int2ptr.
-        if !ecx.is_alloc_live(alloc_id) {
+        if !this.is_alloc_live(alloc_id) {
            return interp_ok(());
        }
        trace!("Exposing allocation id {alloc_id:?}");
-        let global_state = ecx.machine.alloc_addresses.get_mut();
+        let global_state = this.machine.alloc_addresses.get_mut();
        global_state.exposed.insert(alloc_id);
-        if ecx.machine.borrow_tracker.is_some() {
-            ecx.expose_tag(alloc_id, tag)?;
+        if this.machine.borrow_tracker.is_some() {
+            this.expose_tag(alloc_id, tag)?;
        }
        interp_ok(())
    }
@ -306,8 +310,8 @@ fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
    fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer> {
        trace!("Casting {:#x} to a pointer", addr);

-        let ecx = self.eval_context_ref();
-        let global_state = ecx.machine.alloc_addresses.borrow();
+        let this = self.eval_context_ref();
+        let global_state = this.machine.alloc_addresses.borrow();

        // Potentially emit a warning.
        match global_state.provenance_mode {
@ -319,9 +323,9 @@ fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer> {
                }
                PAST_WARNINGS.with_borrow_mut(|past_warnings| {
                    let first = past_warnings.is_empty();
-                    if past_warnings.insert(ecx.cur_span()) {
+                    if past_warnings.insert(this.cur_span()) {
                        // Newly inserted, so first time we see this span.
-                        ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
+                        this.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
                    }
                });
            }
@ -347,19 +351,19 @@ fn adjust_alloc_root_pointer(
        tag: BorTag,
        kind: MemoryKind,
    ) -> InterpResult<'tcx, interpret::Pointer<Provenance>> {
-        let ecx = self.eval_context_ref();
+        let this = self.eval_context_ref();

        let (prov, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
        let alloc_id = prov.alloc_id();

        // Get a pointer to the beginning of this allocation.
-        let base_addr = ecx.addr_from_alloc_id(alloc_id, kind)?;
+        let base_addr = this.addr_from_alloc_id(alloc_id, kind)?;
        let base_ptr = interpret::Pointer::new(
            Provenance::Concrete { alloc_id, tag },
            Size::from_bytes(base_addr),
        );
        // Add offset with the right kind of pointer-overflowing arithmetic.
-        interp_ok(base_ptr.wrapping_offset(offset, ecx))
+        interp_ok(base_ptr.wrapping_offset(offset, this))
    }

    // This returns some prepared `MiriAllocBytes`, either because `addr_from_alloc_id` reserved
@ -371,16 +375,16 @@ fn get_global_alloc_bytes(
        bytes: &[u8],
        align: Align,
    ) -> InterpResult<'tcx, MiriAllocBytes> {
-        let ecx = self.eval_context_ref();
-        if ecx.machine.native_lib.is_some() {
+        let this = self.eval_context_ref();
+        if this.machine.native_lib.is_some() {
            // In native lib mode, MiriAllocBytes for global allocations are handled via `prepared_alloc_bytes`.
            // This additional call ensures that some `MiriAllocBytes` are always prepared, just in case
            // this function gets called before the first time `addr_from_alloc_id` gets called.
-            ecx.addr_from_alloc_id(id, kind)?;
+            this.addr_from_alloc_id(id, kind)?;
            // The memory we need here will have already been allocated during an earlier call to
            // `addr_from_alloc_id` for this allocation. So don't create a new `MiriAllocBytes` here, instead
            // fetch the previously prepared bytes from `prepared_alloc_bytes`.
-            let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
+            let mut global_state = this.machine.alloc_addresses.borrow_mut();
            let mut prepared_alloc_bytes = global_state
                .prepared_alloc_bytes
                .remove(&id)
@ -403,7 +407,7 @@ fn ptr_get_alloc(
        ptr: interpret::Pointer<Provenance>,
        size: i64,
    ) -> Option<(AllocId, Size)> {
-        let ecx = self.eval_context_ref();
+        let this = self.eval_context_ref();

        let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)

@ -411,15 +415,15 @@ fn ptr_get_alloc(
            alloc_id
        } else {
            // A wildcard pointer.
-            ecx.alloc_id_from_addr(addr.bytes(), size)?
+            this.alloc_id_from_addr(addr.bytes(), size)?
        };

        // This cannot fail: since we already have a pointer with that provenance, adjust_alloc_root_pointer
        // must have been called in the past, so we can just look up the address in the map.
-        let base_addr = *ecx.machine.alloc_addresses.borrow().base_addr.get(&alloc_id).unwrap();
+        let base_addr = *this.machine.alloc_addresses.borrow().base_addr.get(&alloc_id).unwrap();

        // Wrapping "addr - base_addr"
-        let rel_offset = ecx.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr));
+        let rel_offset = this.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr));
        Some((alloc_id, Size::from_bytes(rel_offset)))
    }
 }
--- a/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs
+++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs
@ -354,7 +354,7 @@ pub fn get(&self, idx: usize) -> Option<Item> {
        self.borrows.get(idx).cloned()
    }

-    #[allow(clippy::len_without_is_empty)] // Stacks are never empty
+    #[expect(clippy::len_without_is_empty)] // Stacks are never empty
    pub fn len(&self) -> usize {
        self.borrows.len()
    }
--- a/src/tools/miri/src/concurrency/sync.rs
+++ b/src/tools/miri/src/concurrency/sync.rs
@ -1,6 +1,8 @@
+use std::cell::RefCell;
 use std::collections::VecDeque;
 use std::collections::hash_map::Entry;
 use std::ops::Not;
+use std::rc::Rc;
 use std::time::Duration;

 use rustc_abi::Size;
@ -121,6 +123,15 @@ struct Futex {
    clock: VClock,
 }

+#[derive(Default, Clone)]
+pub struct FutexRef(Rc<RefCell<Futex>>);
+
+impl VisitProvenance for FutexRef {
+    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
+        // No provenance in `Futex`.
+    }
+}
+
 /// A thread waiting on a futex.
 #[derive(Debug)]
 struct FutexWaiter {
@ -137,9 +148,6 @@ pub struct SynchronizationObjects {
    rwlocks: IndexVec<RwLockId, RwLock>,
    condvars: IndexVec<CondvarId, Condvar>,
    pub(super) init_onces: IndexVec<InitOnceId, InitOnce>,
-
-    /// Futex info for the futex at the given address.
-    futexes: FxHashMap<u64, Futex>,
 }

 // Private extension trait for local helper methods
@ -184,7 +192,7 @@ pub fn init_once_create(&mut self) -> InitOnceId {
 }

 impl<'tcx> AllocExtra<'tcx> {
-    pub fn get_sync<T: 'static>(&self, offset: Size) -> Option<&T> {
+    fn get_sync<T: 'static>(&self, offset: Size) -> Option<&T> {
        self.sync.get(&offset).and_then(|s| s.downcast_ref::<T>())
    }
 }
@ -273,27 +281,32 @@ fn lazy_sync_get_data<T: 'static + Copy>(

    /// Get the synchronization primitive associated with the given pointer,
    /// or initialize a new one.
+    ///
+    /// Return `None` if this pointer does not point to at least 1 byte of mutable memory.
    fn get_sync_or_init<'a, T: 'static>(
        &'a mut self,
        ptr: Pointer,
-        new: impl FnOnce(&'a mut MiriMachine<'tcx>) -> InterpResult<'tcx, T>,
-    ) -> InterpResult<'tcx, &'a T>
+        new: impl FnOnce(&'a mut MiriMachine<'tcx>) -> T,
+    ) -> Option<&'a T>
    where
        'tcx: 'a,
    {
        let this = self.eval_context_mut();
-        // Ensure there is memory behind this pointer, so that this allocation
-        // is truly the only place where the data could be stored.
-        this.check_ptr_access(ptr, Size::from_bytes(1), CheckInAllocMsg::InboundsTest)?;
-
-        let (alloc, offset, _) = this.ptr_get_alloc_id(ptr, 0)?;
-        let (alloc_extra, machine) = this.get_alloc_extra_mut(alloc)?;
+        if !this.ptr_try_get_alloc_id(ptr, 0).ok().is_some_and(|(alloc_id, offset, ..)| {
+            let info = this.get_alloc_info(alloc_id);
+            info.kind == AllocKind::LiveData && info.mutbl.is_mut() && offset < info.size
+        }) {
+            return None;
+        }
+        // This cannot fail now.
+        let (alloc, offset, _) = this.ptr_get_alloc_id(ptr, 0).unwrap();
+        let (alloc_extra, machine) = this.get_alloc_extra_mut(alloc).unwrap();
        // Due to borrow checker reasons, we have to do the lookup twice.
        if alloc_extra.get_sync::<T>(offset).is_none() {
-            let new = new(machine)?;
+            let new = new(machine);
            alloc_extra.sync.insert(offset, Box::new(new));
        }
-        interp_ok(alloc_extra.get_sync::<T>(offset).unwrap())
+        Some(alloc_extra.get_sync::<T>(offset).unwrap())
    }

    #[inline]
@ -690,33 +703,35 @@ fn condvar_signal(&mut self, id: CondvarId) -> InterpResult<'tcx, bool> {
    /// On a timeout, `retval_timeout` is written to `dest` and `errno_timeout` is set as the last error.
    fn futex_wait(
        &mut self,
-        addr: u64,
+        futex_ref: FutexRef,
        bitset: u32,
        timeout: Option<(TimeoutClock, TimeoutAnchor, Duration)>,
        retval_succ: Scalar,
        retval_timeout: Scalar,
        dest: MPlaceTy<'tcx>,
-        errno_timeout: Scalar,
+        errno_timeout: IoError,
    ) {
        let this = self.eval_context_mut();
        let thread = this.active_thread();
-        let futex = &mut this.machine.sync.futexes.entry(addr).or_default();
+        let mut futex = futex_ref.0.borrow_mut();
        let waiters = &mut futex.waiters;
        assert!(waiters.iter().all(|waiter| waiter.thread != thread), "thread is already waiting");
        waiters.push_back(FutexWaiter { thread, bitset });
+        drop(futex);
+
        this.block_thread(
-            BlockReason::Futex { addr },
+            BlockReason::Futex,
            timeout,
            callback!(
                @capture<'tcx> {
-                    addr: u64,
+                    futex_ref: FutexRef,
                    retval_succ: Scalar,
                    retval_timeout: Scalar,
                    dest: MPlaceTy<'tcx>,
-                    errno_timeout: Scalar,
+                    errno_timeout: IoError,
                }
                @unblock = |this| {
-                    let futex = this.machine.sync.futexes.get(&addr).unwrap();
+                    let futex = futex_ref.0.borrow();
                    // Acquire the clock of the futex.
                    if let Some(data_race) = &this.machine.data_race {
                        data_race.acquire_clock(&futex.clock, &this.machine.threads);
@ -728,7 +743,7 @@ fn futex_wait(
                @timeout = |this| {
                    // Remove the waiter from the futex.
                    let thread = this.active_thread();
-                    let futex = this.machine.sync.futexes.get_mut(&addr).unwrap();
+                    let mut futex = futex_ref.0.borrow_mut();
                    futex.waiters.retain(|waiter| waiter.thread != thread);
                    // Set errno and write return value.
                    this.set_last_error(errno_timeout)?;
@ -739,12 +754,11 @@ fn futex_wait(
        );
    }

+    /// Wake up the first thread in the queue that matches any of the bits in the bitset.
    /// Returns whether anything was woken.
-    fn futex_wake(&mut self, addr: u64, bitset: u32) -> InterpResult<'tcx, bool> {
+    fn futex_wake(&mut self, futex_ref: &FutexRef, bitset: u32) -> InterpResult<'tcx, bool> {
        let this = self.eval_context_mut();
-        let Some(futex) = this.machine.sync.futexes.get_mut(&addr) else {
-            return interp_ok(false);
-        };
+        let mut futex = futex_ref.0.borrow_mut();
        let data_race = &this.machine.data_race;

        // Each futex-wake happens-before the end of the futex wait
@ -757,7 +771,8 @@ fn futex_wake(&mut self, addr: u64, bitset: u32) -> InterpResult<'tcx, bool> {
            return interp_ok(false);
        };
        let waiter = futex.waiters.remove(i).unwrap();
-        this.unblock_thread(waiter.thread, BlockReason::Futex { addr })?;
+        drop(futex);
+        this.unblock_thread(waiter.thread, BlockReason::Futex)?;
        interp_ok(true)
    }
 }
--- a/src/tools/miri/src/concurrency/thread.rs
+++ b/src/tools/miri/src/concurrency/thread.rs
@ -1,7 +1,6 @@
 //! Implements threads.

 use std::mem;
-use std::num::TryFromIntError;
 use std::sync::atomic::Ordering::Relaxed;
 use std::task::Poll;
 use std::time::{Duration, SystemTime};
@ -127,26 +126,6 @@ fn index(self) -> usize {
    }
 }

-impl TryFrom<u64> for ThreadId {
-    type Error = TryFromIntError;
-    fn try_from(id: u64) -> Result<Self, Self::Error> {
-        u32::try_from(id).map(Self)
-    }
-}
-
-impl TryFrom<i128> for ThreadId {
-    type Error = TryFromIntError;
-    fn try_from(id: i128) -> Result<Self, Self::Error> {
-        u32::try_from(id).map(Self)
-    }
-}
-
-impl From<u32> for ThreadId {
-    fn from(id: u32) -> Self {
-        Self(id)
-    }
-}
-
 impl From<ThreadId> for u64 {
    fn from(t: ThreadId) -> Self {
        t.0.into()
@ -168,7 +147,7 @@ pub enum BlockReason {
    /// Blocked on a reader-writer lock.
    RwLock(RwLockId),
    /// Blocked on a Futex variable.
-    Futex { addr: u64 },
+    Futex,
    /// Blocked on an InitOnce.
    InitOnce(InitOnceId),
    /// Blocked on epoll.
@ -448,6 +427,10 @@ pub enum TimeoutAnchor {
    Absolute,
 }

+/// An error signaling that the requested thread doesn't exist.
+#[derive(Debug, Copy, Clone)]
+pub struct ThreadNotFound;
+
 /// A set of threads.
 #[derive(Debug)]
 pub struct ThreadManager<'tcx> {
@ -509,6 +492,16 @@ pub(crate) fn init(
        }
    }

+    pub fn thread_id_try_from(&self, id: impl TryInto<u32>) -> Result<ThreadId, ThreadNotFound> {
+        if let Ok(id) = id.try_into()
+            && usize::try_from(id).is_ok_and(|id| id < self.threads.len())
+        {
+            Ok(ThreadId(id))
+        } else {
+            Err(ThreadNotFound)
+        }
+    }
+
    /// Check if we have an allocation for the given thread local static for the
    /// active thread.
    fn get_thread_local_alloc_id(&self, def_id: DefId) -> Option<StrictPointer> {
@ -534,6 +527,7 @@ pub fn active_thread_stack_mut(
    ) -> &mut Vec<Frame<'tcx, Provenance, FrameExtra<'tcx>>> {
        &mut self.threads[self.active_thread].stack
    }
+
    pub fn all_stacks(
        &self,
    ) -> impl Iterator<Item = (ThreadId, &[Frame<'tcx, Provenance, FrameExtra<'tcx>>])> {
@ -868,6 +862,11 @@ fn run_on_stack_empty(&mut self) -> InterpResult<'tcx, Poll<()>> {
 // Public interface to thread management.
 impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
 pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
+    #[inline]
+    fn thread_id_try_from(&self, id: impl TryInto<u32>) -> Result<ThreadId, ThreadNotFound> {
+        self.eval_context_ref().machine.threads.thread_id_try_from(id)
+    }
+
    /// Get a thread-specific allocation id for the given thread-local static.
    /// If needed, allocate a new one.
    fn get_or_create_thread_local_alloc(
@ -1160,8 +1159,7 @@ fn active_thread_stack_mut<'a>(
    /// Set the name of the current thread. The buffer must not include the null terminator.
    #[inline]
    fn set_thread_name(&mut self, thread: ThreadId, new_thread_name: Vec<u8>) {
-        let this = self.eval_context_mut();
-        this.machine.threads.set_thread_name(thread, new_thread_name);
+        self.eval_context_mut().machine.threads.set_thread_name(thread, new_thread_name);
    }

    #[inline]
--- a/src/tools/miri/src/concurrency/weak_memory.rs
+++ b/src/tools/miri/src/concurrency/weak_memory.rs
@ -300,7 +300,6 @@ fn buffered_write(
        interp_ok(())
    }

-    #[allow(clippy::if_same_then_else, clippy::needless_bool)]
    /// Selects a valid store element in the buffer.
    fn fetch_store<R: rand::Rng + ?Sized>(
        &self,
--- a/src/tools/miri/src/eval.rs
+++ b/src/tools/miri/src/eval.rs
@ -423,7 +423,7 @@ pub fn create_ecx<'tcx>(
 /// Evaluates the entry function specified by `entry_id`.
 /// Returns `Some(return_code)` if program executed completed.
 /// Returns `None` if an evaluation error occurred.
-#[allow(clippy::needless_lifetimes)]
+#[expect(clippy::needless_lifetimes)]
 pub fn eval_entry<'tcx>(
    tcx: TyCtxt<'tcx>,
    entry_id: DefId,
--- a/src/tools/miri/src/helpers.rs
+++ b/src/tools/miri/src/helpers.rs
@ -156,7 +156,7 @@ pub fn iter_exported_symbols<'tcx>(
    for cnum in dependency_format.1.iter().enumerate().filter_map(|(num, &linkage)| {
        // We add 1 to the number because that's what rustc also does everywhere it
        // calls `CrateNum::new`...
-        #[allow(clippy::arithmetic_side_effects)]
+        #[expect(clippy::arithmetic_side_effects)]
        (linkage != Linkage::NotLinked).then_some(CrateNum::new(num + 1))
    }) {
        // We can ignore `_export_info` here: we are a Rust crate, and everything is exported
--- a/src/tools/miri/src/intrinsics/mod.rs
+++ b/src/tools/miri/src/intrinsics/mod.rs
@ -292,7 +292,6 @@ fn emulate_intrinsic_by_name(
                let b = this.read_scalar(b)?.to_f32()?;
                let c = this.read_scalar(c)?.to_f32()?;
                let fuse: bool = this.machine.rng.get_mut().gen();
-                #[allow(clippy::arithmetic_side_effects)] // float ops don't overflow
                let res = if fuse {
                    // FIXME: Using host floats, to work around https://github.com/rust-lang/rustc_apfloat/issues/11
                    a.to_host().mul_add(b.to_host(), c.to_host()).to_soft()
@ -308,7 +307,6 @@ fn emulate_intrinsic_by_name(
                let b = this.read_scalar(b)?.to_f64()?;
                let c = this.read_scalar(c)?.to_f64()?;
                let fuse: bool = this.machine.rng.get_mut().gen();
-                #[allow(clippy::arithmetic_side_effects)] // float ops don't overflow
                let res = if fuse {
                    // FIXME: Using host floats, to work around https://github.com/rust-lang/rustc_apfloat/issues/11
                    a.to_host().mul_add(b.to_host(), c.to_host()).to_soft()
--- a/src/tools/miri/src/intrinsics/simd.rs
+++ b/src/tools/miri/src/intrinsics/simd.rs
@ -750,7 +750,6 @@ enum Op {

                    let val = if simd_element_to_bool(mask)? {
                        // Size * u64 is implemented as always checked
-                        #[allow(clippy::arithmetic_side_effects)]
                        let ptr = ptr.wrapping_offset(dest.layout.size * i, this);
                        let place = this.ptr_to_mplace(ptr, dest.layout);
                        this.read_immediate(&place)?
@ -774,7 +773,6 @@ enum Op {

                    if simd_element_to_bool(mask)? {
                        // Size * u64 is implemented as always checked
-                        #[allow(clippy::arithmetic_side_effects)]
                        let ptr = ptr.wrapping_offset(val.layout.size * i, this);
                        let place = this.ptr_to_mplace(ptr, val.layout);
                        this.write_immediate(*val, &place)?
@ -831,7 +829,7 @@ fn simd_bitmask_index(idx: u32, vec_len: u32, endianness: Endian) -> u32 {
    assert!(idx < vec_len);
    match endianness {
        Endian::Little => idx,
-        #[allow(clippy::arithmetic_side_effects)] // idx < vec_len
+        #[expect(clippy::arithmetic_side_effects)] // idx < vec_len
        Endian::Big => vec_len - 1 - idx, // reverse order of bits
    }
 }
--- a/src/tools/miri/src/provenance_gc.rs
+++ b/src/tools/miri/src/provenance_gc.rs
@ -89,6 +89,18 @@ fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
    }
 }

+impl VisitProvenance for IoError {
+    fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
+        use crate::shims::io_error::IoError::*;
+        match self {
+            LibcError(_name) => (),
+            WindowsError(_name) => (),
+            HostError(_io_error) => (),
+            Raw(scalar) => scalar.visit_provenance(visit),
+        }
+    }
+}
+
 impl VisitProvenance for Immediate<Provenance> {
    fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
        match self {
--- a/src/tools/miri/src/shims/foreign_items.rs
+++ b/src/tools/miri/src/shims/foreign_items.rs
@ -21,7 +21,7 @@
 #[derive(Debug, Copy, Clone)]
 pub struct DynSym(Symbol);

-#[allow(clippy::should_implement_trait)]
+#[expect(clippy::should_implement_trait)]
 impl DynSym {
    pub fn from_str(name: &str) -> Self {
        DynSym(Symbol::intern(name))
@ -648,7 +648,7 @@ fn emulate_foreign_item_inner(
                let val = this.read_scalar(val)?.to_i32()?;
                let num = this.read_target_usize(num)?;
                // The docs say val is "interpreted as unsigned char".
-                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
+                #[expect(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
                let val = val as u8;

                // C requires that this must always be a valid pointer (C18 §7.1.4).
@ -661,7 +661,7 @@ fn emulate_foreign_item_inner(
                    .position(|&c| c == val)
                {
                    let idx = u64::try_from(idx).unwrap();
-                    #[allow(clippy::arithmetic_side_effects)] // idx < num, so this never wraps
+                    #[expect(clippy::arithmetic_side_effects)] // idx < num, so this never wraps
                    let new_ptr = ptr.wrapping_offset(Size::from_bytes(num - idx - 1), this);
                    this.write_pointer(new_ptr, dest)?;
                } else {
@ -675,7 +675,7 @@ fn emulate_foreign_item_inner(
                let val = this.read_scalar(val)?.to_i32()?;
                let num = this.read_target_usize(num)?;
                // The docs say val is "interpreted as unsigned char".
-                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
+                #[expect(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
                let val = val as u8;

                // C requires that this must always be a valid pointer (C18 §7.1.4).
--- a/src/tools/miri/src/shims/io_error.rs
+++ b/src/tools/miri/src/shims/io_error.rs
@ -7,6 +7,7 @@
 #[derive(Debug)]
 pub enum IoError {
    LibcError(&'static str),
+    WindowsError(&'static str),
    HostError(io::Error),
    Raw(Scalar),
 }
@ -113,6 +114,7 @@ fn set_last_error(&mut self, err: impl Into<IoError>) -> InterpResult<'tcx> {
        let errno = match err.into() {
            HostError(err) => this.io_error_to_errnum(err)?,
            LibcError(name) => this.eval_libc(name),
+            WindowsError(name) => this.eval_windows("c", name),
            Raw(val) => val,
        };
        let errno_place = this.last_error_place()?;
@ -186,7 +188,7 @@ fn io_error_to_errnum(&self, err: std::io::Error) -> InterpResult<'tcx, Scalar>
    }

    /// The inverse of `io_error_to_errnum`.
-    #[allow(clippy::needless_return)]
+    #[expect(clippy::needless_return)]
    fn try_errnum_to_io_error(
        &self,
        errnum: Scalar,
--- a/src/tools/miri/src/shims/tls.rs
+++ b/src/tools/miri/src/shims/tls.rs
@ -53,7 +53,7 @@ fn default() -> Self {
 impl<'tcx> TlsData<'tcx> {
    /// Generate a new TLS key with the given destructor.
    /// `max_size` determines the integer size the key has to fit in.
-    #[allow(clippy::arithmetic_side_effects)]
+    #[expect(clippy::arithmetic_side_effects)]
    pub fn create_tls_key(
        &mut self,
        dtor: Option<ty::Instance<'tcx>>,
--- a/src/tools/miri/src/shims/unix/android/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/android/foreign_items.rs
@ -2,6 +2,7 @@
 use rustc_span::Symbol;

 use crate::shims::unix::android::thread::prctl;
+use crate::shims::unix::linux::syscall::syscall;
 use crate::*;

 pub fn is_dyn_sym(_name: &str) -> bool {
@ -26,6 +27,9 @@ fn emulate_foreign_item_inner(
                this.write_scalar(errno_place.to_ref(this).to_scalar(), dest)?;
            }

+            // Dynamically invoked syscalls
+            "syscall" => syscall(this, link_name, abi, args, dest)?,
+
            // Threading
            "prctl" => prctl(this, link_name, abi, args, dest)?,

--- a/src/tools/miri/src/shims/unix/android/thread.rs
+++ b/src/tools/miri/src/shims/unix/android/thread.rs
@ -2,7 +2,7 @@
 use rustc_span::Symbol;

 use crate::helpers::check_min_arg_count;
-use crate::shims::unix::thread::EvalContextExt as _;
+use crate::shims::unix::thread::{EvalContextExt as _, ThreadNameResult};
 use crate::*;

 const TASK_COMM_LEN: usize = 16;
@ -32,7 +32,7 @@ pub fn prctl<'tcx>(
            // https://www.man7.org/linux/man-pages/man2/PR_SET_NAME.2const.html
            let res =
                this.pthread_setname_np(thread, name, TASK_COMM_LEN, /* truncate */ true)?;
-            assert!(res);
+            assert_eq!(res, ThreadNameResult::Ok);
            Scalar::from_u32(0)
        }
        op if op == pr_get_name => {
@ -46,7 +46,7 @@ pub fn prctl<'tcx>(
                CheckInAllocMsg::MemoryAccessTest,
            )?;
            let res = this.pthread_getname_np(thread, name, len, /* truncate*/ false)?;
-            assert!(res);
+            assert_eq!(res, ThreadNameResult::Ok);
            Scalar::from_u32(0)
        }
        op => throw_unsup_format!("Miri does not support `prctl` syscall with op={}", op),
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@ -603,13 +603,13 @@ fn emulate_foreign_item_inner(
            }
            "pthread_join" => {
                let [thread, retval] = this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
-                this.pthread_join(thread, retval)?;
-                this.write_null(dest)?;
+                let res = this.pthread_join(thread, retval)?;
+                this.write_scalar(res, dest)?;
            }
            "pthread_detach" => {
                let [thread] = this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
-                this.pthread_detach(thread)?;
-                this.write_null(dest)?;
+                let res = this.pthread_detach(thread)?;
+                this.write_scalar(res, dest)?;
            }
            "pthread_self" => {
                let [] = this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
--- a/src/tools/miri/src/shims/unix/fs.rs
+++ b/src/tools/miri/src/shims/unix/fs.rs
@ -385,7 +385,7 @@ pub struct DirTable {
 }

 impl DirTable {
-    #[allow(clippy::arithmetic_side_effects)]
+    #[expect(clippy::arithmetic_side_effects)]
    fn insert_new(&mut self, read_dir: ReadDir) -> u64 {
        let id = self.next_id;
        self.next_id += 1;
--- a/src/tools/miri/src/shims/unix/linux/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
@ -4,8 +4,7 @@
 use self::shims::unix::linux::epoll::EvalContextExt as _;
 use self::shims::unix::linux::eventfd::EvalContextExt as _;
 use self::shims::unix::linux::mem::EvalContextExt as _;
-use self::shims::unix::linux::sync::futex;
-use crate::helpers::check_min_arg_count;
+use self::shims::unix::linux::syscall::syscall;
 use crate::machine::{SIGRTMAX, SIGRTMIN};
 use crate::shims::unix::*;
 use crate::*;
@ -82,13 +81,17 @@ fn emulate_foreign_item_inner(
            "pthread_setname_np" => {
                let [thread, name] =
                    this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
-                let res = this.pthread_setname_np(
+                let res = match this.pthread_setname_np(
                    this.read_scalar(thread)?,
                    this.read_scalar(name)?,
                    TASK_COMM_LEN,
                    /* truncate */ false,
-                )?;
-                let res = if res { Scalar::from_u32(0) } else { this.eval_libc("ERANGE") };
+                )? {
+                    ThreadNameResult::Ok => Scalar::from_u32(0),
+                    ThreadNameResult::NameTooLong => this.eval_libc("ERANGE"),
+                    // Act like we faild to open `/proc/self/task/$tid/comm`.
+                    ThreadNameResult::ThreadNotFound => this.eval_libc("ENOENT"),
+                };
                this.write_scalar(res, dest)?;
            }
            "pthread_getname_np" => {
@ -98,14 +101,18 @@ fn emulate_foreign_item_inner(
                // In case of glibc, the length of the output buffer must
                // be not shorter than TASK_COMM_LEN.
                let len = this.read_scalar(len)?;
-                let res = if len.to_target_usize(this)? >= TASK_COMM_LEN as u64
-                    && this.pthread_getname_np(
+                let res = if len.to_target_usize(this)? >= TASK_COMM_LEN as u64 {
+                    match this.pthread_getname_np(
                        this.read_scalar(thread)?,
                        this.read_scalar(name)?,
                        len,
                        /* truncate*/ false,
                    )? {
-                    Scalar::from_u32(0)
+                        ThreadNameResult::Ok => Scalar::from_u32(0),
+                        ThreadNameResult::NameTooLong => unreachable!(),
+                        // Act like we faild to open `/proc/self/task/$tid/comm`.
+                        ThreadNameResult::ThreadNotFound => this.eval_libc("ENOENT"),
+                    }
                } else {
                    this.eval_libc("ERANGE")
                };
@ -119,57 +126,7 @@ fn emulate_foreign_item_inner(

            // Dynamically invoked syscalls
            "syscall" => {
-                // We do not use `check_shim` here because `syscall` is variadic. The argument
-                // count is checked bellow.
-                this.check_abi_and_shim_symbol_clash(
-                    abi,
-                    ExternAbi::C { unwind: false },
-                    link_name,
-                )?;
-                // The syscall variadic function is legal to call with more arguments than needed,
-                // extra arguments are simply ignored. The important check is that when we use an
-                // argument, we have to also check all arguments *before* it to ensure that they
-                // have the right type.
-
-                let sys_getrandom = this.eval_libc("SYS_getrandom").to_target_usize(this)?;
-                let sys_futex = this.eval_libc("SYS_futex").to_target_usize(this)?;
-                let sys_eventfd2 = this.eval_libc("SYS_eventfd2").to_target_usize(this)?;
-
-                let [op] = check_min_arg_count("syscall", args)?;
-                match this.read_target_usize(op)? {
-                    // `libc::syscall(NR_GETRANDOM, buf.as_mut_ptr(), buf.len(), GRND_NONBLOCK)`
-                    // is called if a `HashMap` is created the regular way (e.g. HashMap<K, V>).
-                    num if num == sys_getrandom => {
-                        // Used by getrandom 0.1
-                        // The first argument is the syscall id, so skip over it.
-                        let [_, ptr, len, flags] =
-                            check_min_arg_count("syscall(SYS_getrandom, ...)", args)?;
-
-                        let ptr = this.read_pointer(ptr)?;
-                        let len = this.read_target_usize(len)?;
-                        // The only supported flags are GRND_RANDOM and GRND_NONBLOCK,
-                        // neither of which have any effect on our current PRNG.
-                        // See <https://github.com/rust-lang/rust/pull/79196> for a discussion of argument sizes.
-                        let _flags = this.read_scalar(flags)?.to_i32()?;
-
-                        this.gen_random(ptr, len)?;
-                        this.write_scalar(Scalar::from_target_usize(len, this), dest)?;
-                    }
-                    // `futex` is used by some synchronization primitives.
-                    num if num == sys_futex => {
-                        futex(this, args, dest)?;
-                    }
-                    num if num == sys_eventfd2 => {
-                        let [_, initval, flags] =
-                            check_min_arg_count("syscall(SYS_evetfd2, ...)", args)?;
-
-                        let result = this.eventfd(initval, flags)?;
-                        this.write_int(result.to_i32()?, dest)?;
-                    }
-                    num => {
-                        throw_unsup_format!("syscall: unsupported syscall number {num}");
-                    }
-                }
+                syscall(this, link_name, abi, args, dest)?;
            }

            // Miscellaneous
--- a/src/tools/miri/src/shims/unix/linux/mem.rs
+++ b/src/tools/miri/src/shims/unix/linux/mem.rs
@ -22,7 +22,7 @@ fn mremap(
        let flags = this.read_scalar(flags)?.to_i32()?;

        // old_address must be a multiple of the page size
-        #[allow(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero
+        #[expect(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero
        if old_address.addr().bytes() % this.machine.page_size != 0 || new_size == 0 {
            this.set_last_error(LibcError("EINVAL"))?;
            return interp_ok(this.eval_libc("MAP_FAILED"));
--- a/src/tools/miri/src/shims/unix/linux/mod.rs
+++ b/src/tools/miri/src/shims/unix/linux/mod.rs
@ -3,3 +3,4 @@
 pub mod foreign_items;
 pub mod mem;
 pub mod sync;
+pub mod syscall;
--- a/src/tools/miri/src/shims/unix/linux/sync.rs
+++ b/src/tools/miri/src/shims/unix/linux/sync.rs
@ -1,6 +1,11 @@
+use crate::concurrency::sync::FutexRef;
 use crate::helpers::check_min_arg_count;
 use crate::*;

+struct LinuxFutex {
+    futex: FutexRef,
+}
+
 /// Implementation of the SYS_futex syscall.
 /// `args` is the arguments *including* the syscall number.
 pub fn futex<'tcx>(
@ -27,7 +32,6 @@ pub fn futex<'tcx>(

    // This is a vararg function so we have to bring our own type for this pointer.
    let addr = this.ptr_to_mplace(addr, this.machine.layouts.i32);
-    let addr_usize = addr.ptr().addr().bytes();

    let futex_private = this.eval_libc_i32("FUTEX_PRIVATE_FLAG");
    let futex_wait = this.eval_libc_i32("FUTEX_WAIT");
@ -63,8 +67,7 @@ pub fn futex<'tcx>(
            };

            if bitset == 0 {
-                this.set_last_error_and_return(LibcError("EINVAL"), dest)?;
-                return interp_ok(());
+                return this.set_last_error_and_return(LibcError("EINVAL"), dest);
            }

            let timeout = this.deref_pointer_as(timeout, this.libc_ty_layout("timespec"))?;
@ -99,19 +102,18 @@ pub fn futex<'tcx>(
            // effects of this and the other thread are correctly observed,
            // otherwise we will deadlock.
            //
-            // There are two scenarios to consider:
-            // 1. If we (FUTEX_WAIT) execute first, we'll push ourselves into
-            //    the waiters queue and go to sleep. They (addr write & FUTEX_WAKE)
-            //    will see us in the queue and wake us up.
-            // 2. If they (addr write & FUTEX_WAKE) execute first, we must observe
-            //    addr's new value. If we see an outdated value that happens to equal
-            //    the expected val, then we'll put ourselves to sleep with no one to wake us
-            //    up, so we end up with a deadlock. This is prevented by having a SeqCst
-            //    fence inside FUTEX_WAKE syscall, and another SeqCst fence
-            //    below, the atomic read on addr after the SeqCst fence is guaranteed
-            //    not to see any value older than the addr write immediately before
-            //    calling FUTEX_WAKE. We'll see futex_val != val and return without
-            //    sleeping.
+            // There are two scenarios to consider, depending on whether WAIT or WAKE goes first:
+            // 1. If we (FUTEX_WAIT) execute first, we'll push ourselves into the waiters queue and
+            //    go to sleep. They (FUTEX_WAKE) will see us in the queue and wake us up. It doesn't
+            //    matter how the addr write is ordered.
+            // 2. If they (FUTEX_WAKE) execute first, that means the addr write is also before us
+            //    (FUTEX_WAIT). It is crucial that we observe addr's new value. If we see an
+            //    outdated value that happens to equal the expected val, then we'll put ourselves to
+            //    sleep with no one to wake us up, so we end up with a deadlock. This is prevented
+            //    by having a SeqCst fence inside FUTEX_WAKE syscall, and another SeqCst fence here
+            //    in FUTEX_WAIT. The atomic read on addr after the SeqCst fence is guaranteed not to
+            //    see any value older than the addr write immediately before calling FUTEX_WAKE.
+            //    We'll see futex_val != val and return without sleeping.
            //
            //    Note that the fences do not create any happens-before relationship.
            //    The read sees the write immediately before the fence not because
@ -140,17 +142,28 @@ pub fn futex<'tcx>(
            this.atomic_fence(AtomicFenceOrd::SeqCst)?;
            // Read an `i32` through the pointer, regardless of any wrapper types.
            // It's not uncommon for `addr` to be passed as another type than `*mut i32`, such as `*const AtomicI32`.
-            let futex_val = this.read_scalar_atomic(&addr, AtomicReadOrd::Relaxed)?.to_i32()?;
+            // We do an acquire read -- it only seems reasonable that if we observe a value here, we
+            // actually establish an ordering with that value.
+            let futex_val = this.read_scalar_atomic(&addr, AtomicReadOrd::Acquire)?.to_i32()?;
            if val == futex_val {
                // The value still matches, so we block the thread and make it wait for FUTEX_WAKE.
+
+                // This cannot fail since we already did an atomic acquire read on that pointer.
+                // Acquire reads are only allowed on mutable memory.
+                let futex_ref = this
+                    .get_sync_or_init(addr.ptr(), |_| LinuxFutex { futex: Default::default() })
+                    .unwrap()
+                    .futex
+                    .clone();
+
                this.futex_wait(
-                    addr_usize,
+                    futex_ref,
                    bitset,
                    timeout,
                    Scalar::from_target_isize(0, this), // retval_succ
                    Scalar::from_target_isize(-1, this), // retval_timeout
                    dest.clone(),
-                    this.eval_libc("ETIMEDOUT"), // errno_timeout
+                    LibcError("ETIMEDOUT"), // errno_timeout
                );
            } else {
                // The futex value doesn't match the expected value, so we return failure
@ -165,6 +178,17 @@ pub fn futex<'tcx>(
        // FUTEX_WAKE_BITSET: (int *addr, int op = FUTEX_WAKE, int val, const timespect *_unused, int *_unused, unsigned int bitset)
        // Same as FUTEX_WAKE, but allows you to specify a bitset to select which threads to wake up.
        op if op == futex_wake || op == futex_wake_bitset => {
+            let Some(futex_ref) =
+                this.get_sync_or_init(addr.ptr(), |_| LinuxFutex { futex: Default::default() })
+            else {
+                // No AllocId, or no live allocation at that AllocId.
+                // Return an error code. (That seems nicer than silently doing something non-intuitive.)
+                // This means that if an address gets reused by a new allocation,
+                // we'll use an independent futex queue for this... that seems acceptable.
+                return this.set_last_error_and_return(LibcError("EFAULT"), dest);
+            };
+            let futex_ref = futex_ref.futex.clone();
+
            let bitset = if op == futex_wake_bitset {
                let [_, _, _, _, timeout, uaddr2, bitset] =
                    check_min_arg_count("`syscall(SYS_futex, FUTEX_WAKE_BITSET, ...)`", args)?;
@ -182,9 +206,9 @@ pub fn futex<'tcx>(
            // before doing the syscall.
            this.atomic_fence(AtomicFenceOrd::SeqCst)?;
            let mut n = 0;
-            #[allow(clippy::arithmetic_side_effects)]
+            #[expect(clippy::arithmetic_side_effects)]
            for _ in 0..val {
-                if this.futex_wake(addr_usize, bitset)? {
+                if this.futex_wake(&futex_ref, bitset)? {
                    n += 1;
                } else {
                    break;
--- a/src/tools/miri/src/shims/unix/linux/syscall.rs
+++ b/src/tools/miri/src/shims/unix/linux/syscall.rs
@ -0,0 +1,63 @@
+use rustc_abi::ExternAbi;
+use rustc_span::Symbol;
+
+use self::shims::unix::linux::eventfd::EvalContextExt as _;
+use crate::helpers::check_min_arg_count;
+use crate::shims::unix::linux::sync::futex;
+use crate::*;
+
+pub fn syscall<'tcx>(
+    this: &mut MiriInterpCx<'tcx>,
+    link_name: Symbol,
+    abi: ExternAbi,
+    args: &[OpTy<'tcx>],
+    dest: &MPlaceTy<'tcx>,
+) -> InterpResult<'tcx> {
+    // We do not use `check_shim` here because `syscall` is variadic. The argument
+    // count is checked bellow.
+    this.check_abi_and_shim_symbol_clash(abi, ExternAbi::C { unwind: false }, link_name)?;
+    // The syscall variadic function is legal to call with more arguments than needed,
+    // extra arguments are simply ignored. The important check is that when we use an
+    // argument, we have to also check all arguments *before* it to ensure that they
+    // have the right type.
+
+    let sys_getrandom = this.eval_libc("SYS_getrandom").to_target_usize(this)?;
+    let sys_futex = this.eval_libc("SYS_futex").to_target_usize(this)?;
+    let sys_eventfd2 = this.eval_libc("SYS_eventfd2").to_target_usize(this)?;
+
+    let [op] = check_min_arg_count("syscall", args)?;
+    match this.read_target_usize(op)? {
+        // `libc::syscall(NR_GETRANDOM, buf.as_mut_ptr(), buf.len(), GRND_NONBLOCK)`
+        // is called if a `HashMap` is created the regular way (e.g. HashMap<K, V>).
+        num if num == sys_getrandom => {
+            // Used by getrandom 0.1
+            // The first argument is the syscall id, so skip over it.
+            let [_, ptr, len, flags] = check_min_arg_count("syscall(SYS_getrandom, ...)", args)?;
+
+            let ptr = this.read_pointer(ptr)?;
+            let len = this.read_target_usize(len)?;
+            // The only supported flags are GRND_RANDOM and GRND_NONBLOCK,
+            // neither of which have any effect on our current PRNG.
+            // See <https://github.com/rust-lang/rust/pull/79196> for a discussion of argument sizes.
+            let _flags = this.read_scalar(flags)?.to_i32()?;
+
+            this.gen_random(ptr, len)?;
+            this.write_scalar(Scalar::from_target_usize(len, this), dest)?;
+        }
+        // `futex` is used by some synchronization primitives.
+        num if num == sys_futex => {
+            futex(this, args, dest)?;
+        }
+        num if num == sys_eventfd2 => {
+            let [_, initval, flags] = check_min_arg_count("syscall(SYS_evetfd2, ...)", args)?;
+
+            let result = this.eventfd(initval, flags)?;
+            this.write_int(result.to_i32()?, dest)?;
+        }
+        num => {
+            throw_unsup_format!("syscall: unsupported syscall number {num}");
+        }
+    };
+
+    interp_ok(())
+}
--- a/src/tools/miri/src/shims/unix/macos/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/macos/foreign_items.rs
@ -181,18 +181,16 @@ fn emulate_foreign_item_inner(
                // are met, then the name is set and 0 is returned. Otherwise, if
                // the specified name is lomnger than MAXTHREADNAMESIZE, then
                // ENAMETOOLONG is returned.
-                //
-                // FIXME: the real implementation maybe returns ESRCH if the thread ID is invalid.
                let thread = this.pthread_self()?;
-                let res = if this.pthread_setname_np(
+                let res = match this.pthread_setname_np(
                    thread,
                    this.read_scalar(name)?,
                    this.eval_libc("MAXTHREADNAMESIZE").to_target_usize(this)?.try_into().unwrap(),
                    /* truncate */ false,
                )? {
-                    Scalar::from_u32(0)
-                } else {
-                    this.eval_libc("ENAMETOOLONG")
+                    ThreadNameResult::Ok => Scalar::from_u32(0),
+                    ThreadNameResult::NameTooLong => this.eval_libc("ENAMETOOLONG"),
+                    ThreadNameResult::ThreadNotFound => unreachable!(),
                };
                // Contrary to the manpage, `pthread_setname_np` on macOS still
                // returns an integer indicating success.
@ -210,15 +208,17 @@ fn emulate_foreign_item_inner(
                // https://github.com/apple-oss-distributions/libpthread/blob/c032e0b076700a0a47db75528a282b8d3a06531a/src/pthread.c#L1160-L1175.
                // The key part is the strlcpy, which truncates the resulting value,
                // but always null terminates (except for zero sized buffers).
-                //
-                // FIXME: the real implementation returns ESRCH if the thread ID is invalid.
-                let res = Scalar::from_u32(0);
-                this.pthread_getname_np(
+                let res = match this.pthread_getname_np(
                    this.read_scalar(thread)?,
                    this.read_scalar(name)?,
                    this.read_scalar(len)?,
                    /* truncate */ true,
-                )?;
+                )? {
+                    ThreadNameResult::Ok => Scalar::from_u32(0),
+                    // `NameTooLong` is possible when the buffer is zero sized,
+                    ThreadNameResult::NameTooLong => Scalar::from_u32(0),
+                    ThreadNameResult::ThreadNotFound => this.eval_libc("ESRCH"),
+                };
                this.write_scalar(res, dest)?;
            }

--- a/src/tools/miri/src/shims/unix/mem.rs
+++ b/src/tools/miri/src/shims/unix/mem.rs
@ -132,7 +132,7 @@ fn munmap(&mut self, addr: &OpTy<'tcx>, length: &OpTy<'tcx>) -> InterpResult<'tc

        // addr must be a multiple of the page size, but apart from that munmap is just implemented
        // as a dealloc.
-        #[allow(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero
+        #[expect(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero
        if addr.addr().bytes() % this.machine.page_size != 0 {
            return this.set_last_error_and_return_i32(LibcError("EINVAL"));
        }
--- a/src/tools/miri/src/shims/unix/mod.rs
+++ b/src/tools/miri/src/shims/unix/mod.rs
@ -21,7 +21,7 @@
 pub use self::linux::epoll::EpollInterestTable;
 pub use self::mem::EvalContextExt as _;
 pub use self::sync::EvalContextExt as _;
-pub use self::thread::EvalContextExt as _;
+pub use self::thread::{EvalContextExt as _, ThreadNameResult};
 pub use self::unnamed_socket::EvalContextExt as _;

 // Make up some constants.
--- a/src/tools/miri/src/shims/unix/solarish/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/solarish/foreign_items.rs
@ -26,26 +26,33 @@ fn emulate_foreign_item_inner(
                // THREAD_NAME_MAX allows a thread name of 31+1 length
                // https://github.com/illumos/illumos-gate/blob/7671517e13b8123748eda4ef1ee165c6d9dba7fe/usr/src/uts/common/sys/thread.h#L613
                let max_len = 32;
-                let res = this.pthread_setname_np(
+                // See https://illumos.org/man/3C/pthread_setname_np for the error codes.
+                let res = match this.pthread_setname_np(
                    this.read_scalar(thread)?,
                    this.read_scalar(name)?,
                    max_len,
                    /* truncate */ false,
-                )?;
-                let res = if res { Scalar::from_u32(0) } else { this.eval_libc("ERANGE") };
+                )? {
+                    ThreadNameResult::Ok => Scalar::from_u32(0),
+                    ThreadNameResult::NameTooLong => this.eval_libc("ERANGE"),
+                    ThreadNameResult::ThreadNotFound => this.eval_libc("ESRCH"),
+                };
                this.write_scalar(res, dest)?;
            }
            "pthread_getname_np" => {
                let [thread, name, len] =
                    this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
-                // https://github.com/illumos/illumos-gate/blob/c56822be04b6c157c8b6f2281e47214c3b86f657/usr/src/lib/libc/port/threads/thr.c#L2449-L2480
-                let res = this.pthread_getname_np(
+                // See https://illumos.org/man/3C/pthread_getname_np for the error codes.
+                let res = match this.pthread_getname_np(
                    this.read_scalar(thread)?,
                    this.read_scalar(name)?,
                    this.read_scalar(len)?,
                    /* truncate */ false,
-                )?;
-                let res = if res { Scalar::from_u32(0) } else { this.eval_libc("ERANGE") };
+                )? {
+                    ThreadNameResult::Ok => Scalar::from_u32(0),
+                    ThreadNameResult::NameTooLong => this.eval_libc("ERANGE"),
+                    ThreadNameResult::ThreadNotFound => this.eval_libc("ESRCH"),
+                };
                this.write_scalar(res, dest)?;
            }

--- a/src/tools/miri/src/shims/unix/sync.rs
+++ b/src/tools/miri/src/shims/unix/sync.rs
@ -685,7 +685,6 @@ fn pthread_rwlock_unlock(&mut self, rwlock_op: &OpTy<'tcx>) -> InterpResult<'tcx

        let id = rwlock_get_data(this, rwlock_op)?.id;

-        #[allow(clippy::if_same_then_else)]
        if this.rwlock_reader_unlock(id)? || this.rwlock_writer_unlock(id)? {
            interp_ok(())
        } else {
--- a/src/tools/miri/src/shims/unix/thread.rs
+++ b/src/tools/miri/src/shims/unix/thread.rs
@ -2,6 +2,13 @@

 use crate::*;

+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum ThreadNameResult {
+    Ok,
+    NameTooLong,
+    ThreadNotFound,
+}
+
 impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
 pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
    fn pthread_create(
@ -30,7 +37,11 @@ fn pthread_create(
        interp_ok(())
    }

-    fn pthread_join(&mut self, thread: &OpTy<'tcx>, retval: &OpTy<'tcx>) -> InterpResult<'tcx, ()> {
+    fn pthread_join(
+        &mut self,
+        thread: &OpTy<'tcx>,
+        retval: &OpTy<'tcx>,
+    ) -> InterpResult<'tcx, Scalar> {
        let this = self.eval_context_mut();

        if !this.ptr_is_null(this.read_pointer(retval)?)? {
@ -38,22 +49,26 @@ fn pthread_join(&mut self, thread: &OpTy<'tcx>, retval: &OpTy<'tcx>) -> InterpRe
            throw_unsup_format!("Miri supports pthread_join only with retval==NULL");
        }

-        let thread_id = this.read_scalar(thread)?.to_int(this.libc_ty_layout("pthread_t").size)?;
-        this.join_thread_exclusive(thread_id.try_into().expect("thread ID should fit in u32"))?;
+        let thread = this.read_scalar(thread)?.to_int(this.libc_ty_layout("pthread_t").size)?;
+        let Ok(thread) = this.thread_id_try_from(thread) else {
+            return interp_ok(this.eval_libc("ESRCH"));
+        };

-        interp_ok(())
+        this.join_thread_exclusive(thread)?;
+
+        interp_ok(Scalar::from_u32(0))
    }

-    fn pthread_detach(&mut self, thread: &OpTy<'tcx>) -> InterpResult<'tcx, ()> {
+    fn pthread_detach(&mut self, thread: &OpTy<'tcx>) -> InterpResult<'tcx, Scalar> {
        let this = self.eval_context_mut();

-        let thread_id = this.read_scalar(thread)?.to_int(this.libc_ty_layout("pthread_t").size)?;
-        this.detach_thread(
-            thread_id.try_into().expect("thread ID should fit in u32"),
-            /*allow_terminated_joined*/ false,
-        )?;
+        let thread = this.read_scalar(thread)?.to_int(this.libc_ty_layout("pthread_t").size)?;
+        let Ok(thread) = this.thread_id_try_from(thread) else {
+            return interp_ok(this.eval_libc("ESRCH"));
+        };
+        this.detach_thread(thread, /*allow_terminated_joined*/ false)?;

-        interp_ok(())
+        interp_ok(Scalar::from_u32(0))
    }

    fn pthread_self(&mut self) -> InterpResult<'tcx, Scalar> {
@ -65,18 +80,21 @@ fn pthread_self(&mut self) -> InterpResult<'tcx, Scalar> {

    /// Set the name of the specified thread. If the name including the null terminator
    /// is longer or equals to `name_max_len`, then if `truncate` is set the truncated name
-    /// is used as the thread name, otherwise `false` is returned.
+    /// is used as the thread name, otherwise [`ThreadNameResult::NameTooLong`] is returned.
+    /// If the specified thread wasn't found, [`ThreadNameResult::ThreadNotFound`] is returned.
    fn pthread_setname_np(
        &mut self,
        thread: Scalar,
        name: Scalar,
        name_max_len: usize,
        truncate: bool,
-    ) -> InterpResult<'tcx, bool> {
+    ) -> InterpResult<'tcx, ThreadNameResult> {
        let this = self.eval_context_mut();

        let thread = thread.to_int(this.libc_ty_layout("pthread_t").size)?;
-        let thread = ThreadId::try_from(thread).unwrap();
+        let Ok(thread) = this.thread_id_try_from(thread) else {
+            return interp_ok(ThreadNameResult::ThreadNotFound);
+        };
        let name = name.to_pointer(this)?;
        let mut name = this.read_c_str(name)?.to_owned();

@ -85,29 +103,32 @@ fn pthread_setname_np(
            if truncate {
                name.truncate(name_max_len.saturating_sub(1));
            } else {
-                return interp_ok(false);
+                return interp_ok(ThreadNameResult::NameTooLong);
            }
        }

        this.set_thread_name(thread, name);

-        interp_ok(true)
+        interp_ok(ThreadNameResult::Ok)
    }

    /// Get the name of the specified thread. If the thread name doesn't fit
    /// the buffer, then if `truncate` is set the truncated name is written out,
-    /// otherwise `false` is returned.
+    /// otherwise [`ThreadNameResult::NameTooLong`] is returned. If the specified
+    /// thread wasn't found, [`ThreadNameResult::ThreadNotFound`] is returned.
    fn pthread_getname_np(
        &mut self,
        thread: Scalar,
        name_out: Scalar,
        len: Scalar,
        truncate: bool,
-    ) -> InterpResult<'tcx, bool> {
+    ) -> InterpResult<'tcx, ThreadNameResult> {
        let this = self.eval_context_mut();

        let thread = thread.to_int(this.libc_ty_layout("pthread_t").size)?;
-        let thread = ThreadId::try_from(thread).unwrap();
+        let Ok(thread) = this.thread_id_try_from(thread) else {
+            return interp_ok(ThreadNameResult::ThreadNotFound);
+        };
        let name_out = name_out.to_pointer(this)?;
        let len = len.to_target_usize(this)?;

@ -119,8 +140,9 @@ fn pthread_getname_np(
        };

        let (success, _written) = this.write_c_str(name, name_out, len)?;
+        let res = if success { ThreadNameResult::Ok } else { ThreadNameResult::NameTooLong };

-        interp_ok(success)
+        interp_ok(res)
    }

    fn sched_yield(&mut self) -> InterpResult<'tcx, ()> {
--- a/src/tools/miri/src/shims/windows/foreign_items.rs
+++ b/src/tools/miri/src/shims/windows/foreign_items.rs
@ -10,6 +10,10 @@
 use crate::shims::windows::*;
 use crate::*;

+// The NTSTATUS STATUS_INVALID_HANDLE (0xC0000008) encoded as a HRESULT by setting the N bit.
+// (https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-erref/0642cb2f-2075-4469-918c-4441e69c548a)
+const STATUS_INVALID_HANDLE: u32 = 0xD0000008;
+
 pub fn is_dyn_sym(name: &str) -> bool {
    // std does dynamic detection for these symbols
    matches!(
@ -25,7 +29,7 @@ fn win_absolute<'tcx>(path: &Path) -> InterpResult<'tcx, io::Result<PathBuf>> {
 }

 #[cfg(unix)]
-#[allow(clippy::get_first, clippy::arithmetic_side_effects)]
+#[expect(clippy::get_first, clippy::arithmetic_side_effects)]
 fn win_absolute<'tcx>(path: &Path) -> InterpResult<'tcx, io::Result<PathBuf>> {
    // We are on Unix, so we need to implement parts of the logic ourselves.
    let bytes = path.as_os_str().as_encoded_bytes();
@ -484,14 +488,14 @@ fn emulate_foreign_item_inner(
                let thread_id =
                    this.CreateThread(security, stacksize, start, arg, flags, thread)?;

-                this.write_scalar(Handle::Thread(thread_id).to_scalar(this), dest)?;
+                this.write_scalar(Handle::Thread(thread_id.to_u32()).to_scalar(this), dest)?;
            }
            "WaitForSingleObject" => {
                let [handle, timeout] =
                    this.check_shim(abi, ExternAbi::System { unwind: false }, link_name, args)?;

                let ret = this.WaitForSingleObject(handle, timeout)?;
-                this.write_scalar(Scalar::from_u32(ret), dest)?;
+                this.write_scalar(ret, dest)?;
            }
            "GetCurrentThread" => {
                let [] =
@ -510,15 +514,20 @@ fn emulate_foreign_item_inner(
                let name = this.read_wide_str(this.read_pointer(name)?)?;

                let thread = match Handle::from_scalar(handle, this)? {
-                    Some(Handle::Thread(thread)) => thread,
-                    Some(Handle::Pseudo(PseudoHandle::CurrentThread)) => this.active_thread(),
+                    Some(Handle::Thread(thread)) => this.thread_id_try_from(thread),
+                    Some(Handle::Pseudo(PseudoHandle::CurrentThread)) => Ok(this.active_thread()),
                    _ => this.invalid_handle("SetThreadDescription")?,
                };
+                let res = match thread {
+                    Ok(thread) => {
+                        // FIXME: use non-lossy conversion
+                        this.set_thread_name(thread, String::from_utf16_lossy(&name).into_bytes());
+                        Scalar::from_u32(0)
+                    }
+                    Err(_) => Scalar::from_u32(STATUS_INVALID_HANDLE),
+                };

-                // FIXME: use non-lossy conversion
-                this.set_thread_name(thread, String::from_utf16_lossy(&name).into_bytes());
-
-                this.write_null(dest)?;
+                this.write_scalar(res, dest)?;
            }
            "GetThreadDescription" => {
                let [handle, name_ptr] =
@ -528,20 +537,25 @@ fn emulate_foreign_item_inner(
                let name_ptr = this.deref_pointer(name_ptr)?; // the pointer where we should store the ptr to the name

                let thread = match Handle::from_scalar(handle, this)? {
-                    Some(Handle::Thread(thread)) => thread,
-                    Some(Handle::Pseudo(PseudoHandle::CurrentThread)) => this.active_thread(),
-                    _ => this.invalid_handle("SetThreadDescription")?,
+                    Some(Handle::Thread(thread)) => this.thread_id_try_from(thread),
+                    Some(Handle::Pseudo(PseudoHandle::CurrentThread)) => Ok(this.active_thread()),
+                    _ => this.invalid_handle("GetThreadDescription")?,
+                };
+                let (name, res) = match thread {
+                    Ok(thread) => {
+                        // Looks like the default thread name is empty.
+                        let name = this.get_thread_name(thread).unwrap_or(b"").to_owned();
+                        let name = this.alloc_os_str_as_wide_str(
+                            bytes_to_os_str(&name)?,
+                            MiriMemoryKind::WinLocal.into(),
+                        )?;
+                        (Scalar::from_maybe_pointer(name, this), Scalar::from_u32(0))
+                    }
+                    Err(_) => (Scalar::null_ptr(this), Scalar::from_u32(STATUS_INVALID_HANDLE)),
                };
-                // Looks like the default thread name is empty.
-                let name = this.get_thread_name(thread).unwrap_or(b"").to_owned();
-                let name = this.alloc_os_str_as_wide_str(
-                    bytes_to_os_str(&name)?,
-                    MiriMemoryKind::WinLocal.into(),
-                )?;

-                this.write_scalar(Scalar::from_maybe_pointer(name, this), &name_ptr)?;
-
-                this.write_null(dest)?;
+                this.write_scalar(name, &name_ptr)?;
+                this.write_scalar(res, dest)?;
            }

            // Miscellaneous
@ -630,9 +644,9 @@ fn emulate_foreign_item_inner(
                let [handle] =
                    this.check_shim(abi, ExternAbi::System { unwind: false }, link_name, args)?;

-                this.CloseHandle(handle)?;
+                let ret = this.CloseHandle(handle)?;

-                this.write_int(1, dest)?;
+                this.write_scalar(ret, dest)?;
            }
            "GetModuleFileNameW" => {
                let [handle, filename, size] =
--- a/src/tools/miri/src/shims/windows/handle.rs
+++ b/src/tools/miri/src/shims/windows/handle.rs
@ -14,7 +14,7 @@ pub enum PseudoHandle {
 pub enum Handle {
    Null,
    Pseudo(PseudoHandle),
-    Thread(ThreadId),
+    Thread(u32),
 }

 impl PseudoHandle {
@ -51,7 +51,7 @@ fn data(self) -> u32 {
        match self {
            Self::Null => 0,
            Self::Pseudo(pseudo_handle) => pseudo_handle.value(),
-            Self::Thread(thread) => thread.to_u32(),
+            Self::Thread(thread) => thread,
        }
    }

@ -63,7 +63,7 @@ fn packed_disc_size() -> u32 {
        let floor_log2 = variant_count.ilog2();

        // we need to add one for non powers of two to compensate for the difference
-        #[allow(clippy::arithmetic_side_effects)] // cannot overflow
+        #[expect(clippy::arithmetic_side_effects)] // cannot overflow
        if variant_count.is_power_of_two() { floor_log2 } else { floor_log2 + 1 }
    }

@ -88,15 +88,14 @@ fn to_packed(self) -> u32 {

        // packs the data into the lower `data_size` bits
        // and packs the discriminant right above the data
-        #[allow(clippy::arithmetic_side_effects)] // cannot overflow
-        return discriminant << data_size | data;
+        discriminant << data_size | data
    }

    fn new(discriminant: u32, data: u32) -> Option<Self> {
        match discriminant {
            Self::NULL_DISCRIMINANT if data == 0 => Some(Self::Null),
            Self::PSEUDO_DISCRIMINANT => Some(Self::Pseudo(PseudoHandle::from_value(data)?)),
-            Self::THREAD_DISCRIMINANT => Some(Self::Thread(data.into())),
+            Self::THREAD_DISCRIMINANT => Some(Self::Thread(data)),
            _ => None,
        }
    }
@ -107,11 +106,10 @@ fn from_packed(handle: u32) -> Option<Self> {
        let data_size = u32::BITS.strict_sub(disc_size);

        // the lower `data_size` bits of this mask are 1
-        #[allow(clippy::arithmetic_side_effects)] // cannot overflow
+        #[expect(clippy::arithmetic_side_effects)] // cannot overflow
        let data_mask = 2u32.pow(data_size) - 1;

        // the discriminant is stored right above the lower `data_size` bits
-        #[allow(clippy::arithmetic_side_effects)] // cannot overflow
        let discriminant = handle >> data_size;

        // the data is stored in the lower `data_size` bits
@ -123,7 +121,7 @@ fn from_packed(handle: u32) -> Option<Self> {
    pub fn to_scalar(self, cx: &impl HasDataLayout) -> Scalar {
        // 64-bit handles are sign extended 32-bit handles
        // see https://docs.microsoft.com/en-us/windows/win32/winprog64/interprocess-communication
-        #[allow(clippy::cast_possible_wrap)] // we want it to wrap
+        #[expect(clippy::cast_possible_wrap)] // we want it to wrap
        let signed_handle = self.to_packed() as i32;
        Scalar::from_target_isize(signed_handle.into(), cx)
    }
@ -134,7 +132,7 @@ pub fn from_scalar<'tcx>(
    ) -> InterpResult<'tcx, Option<Self>> {
        let sign_extended_handle = handle.to_target_isize(cx)?;

-        #[allow(clippy::cast_sign_loss)] // we want to lose the sign
+        #[expect(clippy::cast_sign_loss)] // we want to lose the sign
        let handle = if let Ok(signed_handle) = i32::try_from(sign_extended_handle) {
            signed_handle as u32
        } else {
@ -156,17 +154,22 @@ fn invalid_handle(&mut self, function_name: &str) -> InterpResult<'tcx, !> {
        )))
    }

-    fn CloseHandle(&mut self, handle_op: &OpTy<'tcx>) -> InterpResult<'tcx> {
+    fn CloseHandle(&mut self, handle_op: &OpTy<'tcx>) -> InterpResult<'tcx, Scalar> {
        let this = self.eval_context_mut();

        let handle = this.read_scalar(handle_op)?;
-
-        match Handle::from_scalar(handle, this)? {
-            Some(Handle::Thread(thread)) =>
-                this.detach_thread(thread, /*allow_terminated_joined*/ true)?,
+        let ret = match Handle::from_scalar(handle, this)? {
+            Some(Handle::Thread(thread)) => {
+                if let Ok(thread) = this.thread_id_try_from(thread) {
+                    this.detach_thread(thread, /*allow_terminated_joined*/ true)?;
+                    this.eval_windows("c", "TRUE")
+                } else {
+                    this.invalid_handle("CloseHandle")?
+                }
+            }
            _ => this.invalid_handle("CloseHandle")?,
-        }
+        };

-        interp_ok(())
+        interp_ok(ret)
    }
 }
--- a/src/tools/miri/src/shims/windows/sync.rs
+++ b/src/tools/miri/src/shims/windows/sync.rs
@ -3,6 +3,7 @@
 use rustc_abi::Size;

 use crate::concurrency::init_once::InitOnceStatus;
+use crate::concurrency::sync::FutexRef;
 use crate::*;

 #[derive(Copy, Clone)]
@ -10,6 +11,10 @@ struct WindowsInitOnce {
    id: InitOnceId,
 }

+struct WindowsFutex {
+    futex: FutexRef,
+}
+
 impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
 trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
    // Windows sync primitives are pointer sized.
@ -168,8 +173,6 @@ fn WaitOnAddress(
        let size = this.read_target_usize(size_op)?;
        let timeout_ms = this.read_scalar(timeout_op)?.to_u32()?;

-        let addr = ptr.addr().bytes();
-
        if size > 8 || !size.is_power_of_two() {
            let invalid_param = this.eval_windows("c", "ERROR_INVALID_PARAMETER");
            this.set_last_error(invalid_param)?;
@ -190,19 +193,27 @@ fn WaitOnAddress(

        let layout = this.machine.layouts.uint(size).unwrap();
        let futex_val =
-            this.read_scalar_atomic(&this.ptr_to_mplace(ptr, layout), AtomicReadOrd::Relaxed)?;
+            this.read_scalar_atomic(&this.ptr_to_mplace(ptr, layout), AtomicReadOrd::Acquire)?;
        let compare_val = this.read_scalar(&this.ptr_to_mplace(compare, layout))?;

        if futex_val == compare_val {
            // If the values are the same, we have to block.
+
+            // This cannot fail since we already did an atomic acquire read on that pointer.
+            let futex_ref = this
+                .get_sync_or_init(ptr, |_| WindowsFutex { futex: Default::default() })
+                .unwrap()
+                .futex
+                .clone();
+
            this.futex_wait(
-                addr,
+                futex_ref,
                u32::MAX, // bitset
                timeout,
                Scalar::from_i32(1), // retval_succ
                Scalar::from_i32(0), // retval_timeout
                dest.clone(),
-                this.eval_windows("c", "ERROR_TIMEOUT"), // errno_timeout
+                IoError::WindowsError("ERROR_TIMEOUT"), // errno_timeout
            );
        }

@ -219,8 +230,15 @@ fn WakeByAddressSingle(&mut self, ptr_op: &OpTy<'tcx>) -> InterpResult<'tcx> {
        // See the Linux futex implementation for why this fence exists.
        this.atomic_fence(AtomicFenceOrd::SeqCst)?;

-        let addr = ptr.addr().bytes();
-        this.futex_wake(addr, u32::MAX)?;
+        let Some(futex_ref) =
+            this.get_sync_or_init(ptr, |_| WindowsFutex { futex: Default::default() })
+        else {
+            // Seems like this cannot return an error, so we just wake nobody.
+            return interp_ok(());
+        };
+        let futex_ref = futex_ref.futex.clone();
+
+        this.futex_wake(&futex_ref, u32::MAX)?;

        interp_ok(())
    }
@ -232,8 +250,15 @@ fn WakeByAddressAll(&mut self, ptr_op: &OpTy<'tcx>) -> InterpResult<'tcx> {
        // See the Linux futex implementation for why this fence exists.
        this.atomic_fence(AtomicFenceOrd::SeqCst)?;

-        let addr = ptr.addr().bytes();
-        while this.futex_wake(addr, u32::MAX)? {}
+        let Some(futex_ref) =
+            this.get_sync_or_init(ptr, |_| WindowsFutex { futex: Default::default() })
+        else {
+            // Seems like this cannot return an error, so we just wake nobody.
+            return interp_ok(());
+        };
+        let futex_ref = futex_ref.futex.clone();
+
+        while this.futex_wake(&futex_ref, u32::MAX)? {}

        interp_ok(())
    }
--- a/src/tools/miri/src/shims/windows/thread.rs
+++ b/src/tools/miri/src/shims/windows/thread.rs
@ -59,14 +59,18 @@ fn WaitForSingleObject(
        &mut self,
        handle_op: &OpTy<'tcx>,
        timeout_op: &OpTy<'tcx>,
-    ) -> InterpResult<'tcx, u32> {
+    ) -> InterpResult<'tcx, Scalar> {
        let this = self.eval_context_mut();

        let handle = this.read_scalar(handle_op)?;
        let timeout = this.read_scalar(timeout_op)?.to_u32()?;

        let thread = match Handle::from_scalar(handle, this)? {
-            Some(Handle::Thread(thread)) => thread,
+            Some(Handle::Thread(thread)) =>
+                match this.thread_id_try_from(thread) {
+                    Ok(thread) => thread,
+                    Err(_) => this.invalid_handle("WaitForSingleObject")?,
+                },
            // Unlike on posix, the outcome of joining the current thread is not documented.
            // On current Windows, it just deadlocks.
            Some(Handle::Pseudo(PseudoHandle::CurrentThread)) => this.active_thread(),
@ -79,6 +83,6 @@ fn WaitForSingleObject(

        this.join_thread(thread)?;

-        interp_ok(0)
+        interp_ok(this.eval_windows("c", "WAIT_OBJECT_0"))
    }
 }
--- a/src/tools/miri/src/shims/x86/gfni.rs
+++ b/src/tools/miri/src/shims/x86/gfni.rs
@ -136,7 +136,7 @@ fn affine_transform<'tcx>(
 // This is a evaluated at compile time. Trait based conversion is not available.
 /// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for the
 /// definition of `gf_inv` which was used for the creation of this table.
-#[allow(clippy::cast_possible_truncation)]
+#[expect(clippy::cast_possible_truncation)]
 static TABLE: [u8; 256] = {
    let mut array = [0; 256];

@ -163,7 +163,7 @@ fn affine_transform<'tcx>(
 /// polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.
 /// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for details.
 // This is a const function. Trait based conversion is not available.
-#[allow(clippy::cast_possible_truncation)]
+#[expect(clippy::cast_possible_truncation)]
 const fn gf2p8_mul(left: u8, right: u8) -> u8 {
    // This implementation is based on the `gf2p8mul_byte` definition found inside the Intel intrinsics guide.
    // See https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8mul
--- a/src/tools/miri/src/shims/x86/mod.rs
+++ b/src/tools/miri/src/shims/x86/mod.rs
@ -95,11 +95,22 @@ fn emulate_x86_intrinsic(
                }
            }

-            "pclmulqdq" => {
+            "pclmulqdq" | "pclmulqdq.256" | "pclmulqdq.512" => {
+                let mut len = 2; // in units of 64bits
+                this.expect_target_feature_for_intrinsic(link_name, "pclmulqdq")?;
+                if unprefixed_name.ends_with(".256") {
+                    this.expect_target_feature_for_intrinsic(link_name, "vpclmulqdq")?;
+                    len = 4;
+                } else if unprefixed_name.ends_with(".512") {
+                    this.expect_target_feature_for_intrinsic(link_name, "vpclmulqdq")?;
+                    this.expect_target_feature_for_intrinsic(link_name, "avx512f")?;
+                    len = 8;
+                }
+
                let [left, right, imm] =
                    this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;

-                pclmulqdq(this, left, right, imm, dest)?;
+                pclmulqdq(this, left, right, imm, dest, len)?;
            }

            name if name.starts_with("bmi.") => {
@ -386,7 +397,6 @@ enum FloatUnaryOp {
 }

 /// Performs `which` scalar operation on `op` and returns the result.
-#[allow(clippy::arithmetic_side_effects)] // floating point operations without side effects
 fn unary_op_f32<'tcx>(
    this: &mut crate::MiriInterpCx<'tcx>,
    which: FloatUnaryOp,
@ -415,7 +425,7 @@ fn unary_op_f32<'tcx>(
 }

 /// Disturbes a floating-point result by a relative error on the order of (-2^scale, 2^scale).
-#[allow(clippy::arithmetic_side_effects)] // floating point arithmetic cannot panic
+#[expect(clippy::arithmetic_side_effects)] // floating point arithmetic cannot panic
 fn apply_random_float_error<F: rustc_apfloat::Float>(
    this: &mut crate::MiriInterpCx<'_>,
    val: F,
@ -1122,7 +1132,7 @@ fn pmulhrsw<'tcx>(

        // The result of this operation can overflow a signed 16-bit integer.
        // When `left` and `right` are -0x8000, the result is 0x8000.
-        #[allow(clippy::cast_possible_truncation)]
+        #[expect(clippy::cast_possible_truncation)]
        let res = res as i16;

        this.write_scalar(Scalar::from_i16(res), &dest)?;
@ -1134,9 +1144,12 @@ fn pmulhrsw<'tcx>(
 /// Perform a carry-less multiplication of two 64-bit integers, selected from `left` and `right` according to `imm8`,
 /// and store the results in `dst`.
 ///
-/// `left` and `right` are both vectors of type 2 x i64. Only bits 0 and 4 of `imm8` matter;
+/// `left` and `right` are both vectors of type `len` x i64. Only bits 0 and 4 of `imm8` matter;
 /// they select the element of `left` and `right`, respectively.
 ///
+/// `len` is the SIMD vector length (in counts of `i64` values). It is expected to be one of
+/// `2`, `4`, or `8`.
+///
 /// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128>
 fn pclmulqdq<'tcx>(
    this: &mut MiriInterpCx<'tcx>,
@ -1144,52 +1157,56 @@ fn pclmulqdq<'tcx>(
    right: &OpTy<'tcx>,
    imm8: &OpTy<'tcx>,
    dest: &MPlaceTy<'tcx>,
+    len: u64,
 ) -> InterpResult<'tcx, ()> {
    assert_eq!(left.layout, right.layout);
    assert_eq!(left.layout.size, dest.layout.size);
+    assert!([2u64, 4, 8].contains(&len));

-    // Transmute to `[u64; 2]`
+    // Transmute the input into arrays of `[u64; len]`.
+    // Transmute the output into an array of `[u128, len / 2]`.

-    let array_layout = this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u64, 2))?;
-    let left = left.transmute(array_layout, this)?;
-    let right = right.transmute(array_layout, this)?;
-    let dest = dest.transmute(array_layout, this)?;
+    let src_layout = this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u64, len))?;
+    let dest_layout = this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, len / 2))?;
+
+    let left = left.transmute(src_layout, this)?;
+    let right = right.transmute(src_layout, this)?;
+    let dest = dest.transmute(dest_layout, this)?;

    let imm8 = this.read_scalar(imm8)?.to_u8()?;

-    // select the 64-bit integer from left that the user specified (low or high)
-    let index = if (imm8 & 0x01) == 0 { 0 } else { 1 };
-    let left = this.read_scalar(&this.project_index(&left, index)?)?.to_u64()?;
+    for i in 0..(len / 2) {
+        let lo = i.strict_mul(2);
+        let hi = i.strict_mul(2).strict_add(1);

-    // select the 64-bit integer from right that the user specified (low or high)
-    let index = if (imm8 & 0x10) == 0 { 0 } else { 1 };
-    let right = this.read_scalar(&this.project_index(&right, index)?)?.to_u64()?;
+        // select the 64-bit integer from left that the user specified (low or high)
+        let index = if (imm8 & 0x01) == 0 { lo } else { hi };
+        let left = this.read_scalar(&this.project_index(&left, index)?)?.to_u64()?;

-    // Perform carry-less multiplication
-    //
-    // This operation is like long multiplication, but ignores all carries.
-    // That idea corresponds to the xor operator, which is used in the implementation.
-    //
-    // Wikipedia has an example https://en.wikipedia.org/wiki/Carry-less_product#Example
-    let mut result: u128 = 0;
+        // select the 64-bit integer from right that the user specified (low or high)
+        let index = if (imm8 & 0x10) == 0 { lo } else { hi };
+        let right = this.read_scalar(&this.project_index(&right, index)?)?.to_u64()?;

-    for i in 0..64 {
-        // if the i-th bit in right is set
-        if (right & (1 << i)) != 0 {
-            // xor result with `left` shifted to the left by i positions
-            result ^= u128::from(left) << i;
+        // Perform carry-less multiplication.
+        //
+        // This operation is like long multiplication, but ignores all carries.
+        // That idea corresponds to the xor operator, which is used in the implementation.
+        //
+        // Wikipedia has an example https://en.wikipedia.org/wiki/Carry-less_product#Example
+        let mut result: u128 = 0;
+
+        for i in 0..64 {
+            // if the i-th bit in right is set
+            if (right & (1 << i)) != 0 {
+                // xor result with `left` shifted to the left by i positions
+                result ^= u128::from(left) << i;
+            }
        }
+
+        let dest = this.project_index(&dest, i)?;
+        this.write_scalar(Scalar::from_u128(result), &dest)?;
    }

-    let result_low = (result & 0xFFFF_FFFF_FFFF_FFFF) as u64;
-    let result_high = (result >> 64) as u64;
-
-    let dest_low = this.project_index(&dest, 0)?;
-    this.write_scalar(Scalar::from_u64(result_low), &dest_low)?;
-
-    let dest_high = this.project_index(&dest, 1)?;
-    this.write_scalar(Scalar::from_u64(result_high), &dest_high)?;
-
    interp_ok(())
 }

--- a/src/tools/miri/src/shims/x86/sse42.rs
+++ b/src/tools/miri/src/shims/x86/sse42.rs
@ -68,7 +68,7 @@
 /// The mask may be negated if negation flags inside the immediate byte are set.
 ///
 /// For more information, see the Intel Software Developer's Manual, Vol. 2b, Chapter 4.1.
-#[allow(clippy::arithmetic_side_effects)]
+#[expect(clippy::arithmetic_side_effects)]
 fn compare_strings<'tcx>(
    this: &mut MiriInterpCx<'tcx>,
    str1: &OpTy<'tcx>,
@ -444,7 +444,7 @@ fn emulate_x86_sse42_intrinsic(
                let crc = if bit_size == 64 {
                    // The 64-bit version will only consider the lower 32 bits,
                    // while the upper 32 bits get discarded.
-                    #[allow(clippy::cast_possible_truncation)]
+                    #[expect(clippy::cast_possible_truncation)]
                    u128::from((left.to_u64()? as u32).reverse_bits())
                } else {
                    u128::from(left.to_u32()?.reverse_bits())
--- a/src/tools/miri/tests/pass-dep/concurrency/linux-futex.rs
+++ b/src/tools/miri/tests/pass-dep/concurrency/linux-futex.rs
@ -1,4 +1,4 @@
-//@only-target: linux
+//@only-target: linux android
 //@compile-flags: -Zmiri-disable-isolation

 // FIXME(static_mut_refs): Do not allow `static_mut_refs` lint
@ -7,8 +7,8 @@
 use std::mem::MaybeUninit;
 use std::ptr::{self, addr_of};
 use std::sync::atomic::{AtomicI32, Ordering};
-use std::thread;
 use std::time::{Duration, Instant};
+use std::{io, thread};

 fn wake_nobody() {
    let futex = 0;
@ -40,9 +40,12 @@ fn wake_dangling() {
    let ptr: *const i32 = &*futex;
    drop(futex);

-    // Wake 1 waiter. Expect zero waiters woken up, as nobody is waiting.
+    // Expect error since this is now "unmapped" memory.
+    // parking_lot relies on this:
+    // <https://github.com/Amanieu/parking_lot/blob/ca920b31312839013b4455aba1d53a4aede21b2f/core/src/thread_parker/linux.rs#L138-L145>
    unsafe {
-        assert_eq!(libc::syscall(libc::SYS_futex, ptr, libc::FUTEX_WAKE, 1), 0);
+        assert_eq!(libc::syscall(libc::SYS_futex, ptr, libc::FUTEX_WAKE, 1), -1);
+        assert_eq!(io::Error::last_os_error().raw_os_error().unwrap(), libc::EFAULT);
    }
 }

--- a/src/tools/miri/tests/pass-dep/libc/libc-random.rs
+++ b/src/tools/miri/tests/pass-dep/libc/libc-random.rs
@ -28,26 +28,27 @@ fn test_getrandom() {

    let mut buf = [0u8; 5];
    unsafe {
-        #[cfg(target_os = "linux")]
-        assert_eq!(
-            libc::syscall(
-                libc::SYS_getrandom,
-                ptr::null_mut::<libc::c_void>(),
-                0 as libc::size_t,
-                0 as libc::c_uint,
-            ),
-            0,
-        );
-        #[cfg(target_os = "linux")]
-        assert_eq!(
-            libc::syscall(
-                libc::SYS_getrandom,
-                buf.as_mut_ptr() as *mut libc::c_void,
-                5 as libc::size_t,
-                0 as libc::c_uint,
-            ),
-            5,
-        );
+        #[cfg(any(target_os = "linux", target_os = "android"))]
+        {
+            assert_eq!(
+                libc::syscall(
+                    libc::SYS_getrandom,
+                    ptr::null_mut::<libc::c_void>(),
+                    0 as libc::size_t,
+                    0 as libc::c_uint,
+                ),
+                0,
+            );
+            assert_eq!(
+                libc::syscall(
+                    libc::SYS_getrandom,
+                    buf.as_mut_ptr() as *mut libc::c_void,
+                    5 as libc::size_t,
+                    0 as libc::c_uint,
+                ),
+                5,
+            );
+        }

        assert_eq!(
            libc::getrandom(ptr::null_mut::<libc::c_void>(), 0 as libc::size_t, 0 as libc::c_uint),
--- a/src/tools/miri/tests/pass-dep/libc/pthread-sync.rs
+++ b/src/tools/miri/tests/pass-dep/libc/pthread-sync.rs
@ -22,6 +22,22 @@ fn main() {
    check_condattr();
 }

+// We want to only use pthread APIs here for easier testing.
+// So we can't use `thread::scope`. That means panics can lead
+// to a failure to join threads which can lead to further issues,
+// so let's turn such unwinding into aborts.
+struct AbortOnDrop;
+impl AbortOnDrop {
+    fn defuse(self) {
+        mem::forget(self);
+    }
+}
+impl Drop for AbortOnDrop {
+    fn drop(&mut self) {
+        std::process::abort();
+    }
+}
+
 fn test_mutex_libc_init_recursive() {
    unsafe {
        let mut attr: libc::pthread_mutexattr_t = mem::zeroed();
@ -122,6 +138,7 @@ fn clone(&self) -> Self {
 }

 fn check_mutex() {
+    let bomb = AbortOnDrop;
    // Specifically *not* using `Arc` to make sure there is no synchronization apart from the mutex.
    unsafe {
        let data = SyncUnsafeCell::new((libc::PTHREAD_MUTEX_INITIALIZER, 0));
@ -148,9 +165,11 @@ fn check_mutex() {
        assert_eq!(libc::pthread_mutex_trylock(mutexptr), 0);
        assert_eq!((*ptr.ptr).1, 3);
    }
+    bomb.defuse();
 }

 fn check_rwlock_write() {
+    let bomb = AbortOnDrop;
    unsafe {
        let data = SyncUnsafeCell::new((libc::PTHREAD_RWLOCK_INITIALIZER, 0));
        let ptr = SendPtr { ptr: data.get() };
@ -187,9 +206,11 @@ fn check_rwlock_write() {
        assert_eq!(libc::pthread_rwlock_tryrdlock(rwlockptr), 0);
        assert_eq!((*ptr.ptr).1, 3);
    }
+    bomb.defuse();
 }

 fn check_rwlock_read_no_deadlock() {
+    let bomb = AbortOnDrop;
    unsafe {
        let l1 = SyncUnsafeCell::new(libc::PTHREAD_RWLOCK_INITIALIZER);
        let l1 = SendPtr { ptr: l1.get() };
@ -213,9 +234,11 @@ fn check_rwlock_read_no_deadlock() {
        assert_eq!(libc::pthread_rwlock_rdlock(l2.ptr), 0);
        handle.join().unwrap();
    }
+    bomb.defuse();
 }

 fn check_cond() {
+    let bomb = AbortOnDrop;
    unsafe {
        let mut cond: MaybeUninit<libc::pthread_cond_t> = MaybeUninit::uninit();
        assert_eq!(libc::pthread_cond_init(cond.as_mut_ptr(), ptr::null()), 0);
@ -260,6 +283,7 @@ fn check_cond() {

        t.join().unwrap();
    }
+    bomb.defuse();
 }

 fn check_condattr() {
--- a/src/tools/miri/tests/pass-dep/libc/pthread-threadname.rs
+++ b/src/tools/miri/tests/pass-dep/libc/pthread-threadname.rs
@ -199,4 +199,29 @@ fn get_thread_name(name: &mut [u8]) -> i32 {
        .unwrap()
        .join()
        .unwrap();
+
+    // Now set the name for a non-existing thread and verify error codes.
+    // (FreeBSD doesn't return an error code.)
+    #[cfg(not(target_os = "freebsd"))]
+    {
+        let invalid_thread = 0xdeadbeef;
+        let error = {
+            cfg_if::cfg_if! {
+                if #[cfg(target_os = "linux")] {
+                    libc::ENOENT
+                } else {
+                    libc::ESRCH
+                }
+            }
+        };
+        #[cfg(not(target_os = "macos"))]
+        {
+            // macOS has no `setname` function accepting a thread id as the first argument.
+            let res = unsafe { libc::pthread_setname_np(invalid_thread, [0].as_ptr()) };
+            assert_eq!(res, error);
+        }
+        let mut buf = [0; 64];
+        let res = unsafe { libc::pthread_getname_np(invalid_thread, buf.as_mut_ptr(), buf.len()) };
+        assert_eq!(res, error);
+    }
 }
--- a/src/tools/miri/tests/pass/shims/x86/intrinsics-sha.rs
+++ b/src/tools/miri/tests/pass/shims/x86/intrinsics-sha.rs
@ -181,7 +181,7 @@ unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128
 }

 // we use unaligned loads with `__m128i` pointers
-#[allow(clippy::cast_ptr_alignment)]
+#[expect(clippy::cast_ptr_alignment)]
 #[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
 unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
    #[allow(non_snake_case)]
--- a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-vpclmulqdq.rs
+++ b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-vpclmulqdq.rs
@ -0,0 +1,193 @@
+// We're testing x86 target specific features
+//@revisions: avx512 avx
+//@only-target: x86_64 i686
+//@[avx512]compile-flags: -C target-feature=+vpclmulqdq,+avx512f
+//@[avx]compile-flags: -C target-feature=+vpclmulqdq,+avx2
+
+// The constants in the tests below are just bit patterns. They should not
+// be interpreted as integers; signedness does not make sense for them, but
+// __mXXXi happens to be defined in terms of signed integers.
+#![allow(overflowing_literals)]
+#![feature(avx512_target_feature)]
+#![feature(stdarch_x86_avx512)]
+
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+use std::mem::transmute;
+
+fn main() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs
+
+    assert!(is_x86_feature_detected!("pclmulqdq"));
+    assert!(is_x86_feature_detected!("vpclmulqdq"));
+
+    unsafe {
+        test_mm256_clmulepi64_epi128();
+
+        if is_x86_feature_detected!("avx512f") {
+            test_mm512_clmulepi64_epi128();
+        }
+    }
+}
+
+macro_rules! verify_kat_pclmul {
+    ($broadcast:ident, $clmul:ident, $assert:ident) => {
+        // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf
+        let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d);
+        let a = $broadcast(a);
+        let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d);
+        let b = $broadcast(b);
+        let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451);
+        let r00 = $broadcast(r00);
+        let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315);
+        let r01 = $broadcast(r01);
+        let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9);
+        let r10 = $broadcast(r10);
+        let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed);
+        let r11 = $broadcast(r11);
+
+        $assert($clmul::<0x00>(a, b), r00);
+        $assert($clmul::<0x10>(a, b), r01);
+        $assert($clmul::<0x01>(a, b), r10);
+        $assert($clmul::<0x11>(a, b), r11);
+
+        let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000);
+        let a0 = $broadcast(a0);
+        let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000);
+        let r = $broadcast(r);
+        $assert($clmul::<0x00>(a0, a0), r);
+    }
+}
+
+// this function tests one of the possible 4 instances
+// with different inputs across lanes for the 512-bit version
+#[target_feature(enable = "vpclmulqdq,avx512f")]
+unsafe fn verify_512_helper(
+    linear: unsafe fn(__m128i, __m128i) -> __m128i,
+    vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
+) {
+    let a = _mm512_set_epi64(
+        0xDCB4DB3657BF0B7D,
+        0x18DB0601068EDD9F,
+        0xB76B908233200DC5,
+        0xE478235FA8E22D5E,
+        0xAB05CFFA2621154C,
+        0x1171B47A186174C9,
+        0x8C6B6C0E7595CEC9,
+        0xBE3E7D4934E961BD,
+    );
+    let b = _mm512_set_epi64(
+        0x672F6F105A94CEA7,
+        0x8298B8FFCA5F829C,
+        0xA3927047B3FB61D8,
+        0x978093862CDE7187,
+        0xB1927AB22F31D0EC,
+        0xA9A5DA619BE4D7AF,
+        0xCA2590F56884FDC6,
+        0x19BE9F660038BDB5,
+    );
+
+    let a_decomp = transmute::<_, [__m128i; 4]>(a);
+    let b_decomp = transmute::<_, [__m128i; 4]>(b);
+
+    let r = vectorized(a, b);
+
+    let e_decomp = [
+        linear(a_decomp[0], b_decomp[0]),
+        linear(a_decomp[1], b_decomp[1]),
+        linear(a_decomp[2], b_decomp[2]),
+        linear(a_decomp[3], b_decomp[3]),
+    ];
+    let e = transmute::<_, __m512i>(e_decomp);
+
+    assert_eq_m512i(r, e)
+}
+
+// this function tests one of the possible 4 instances
+// with different inputs across lanes for the 256-bit version
+#[target_feature(enable = "vpclmulqdq")]
+unsafe fn verify_256_helper(
+    linear: unsafe fn(__m128i, __m128i) -> __m128i,
+    vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
+) {
+    let a = _mm256_set_epi64x(
+        0xDCB4DB3657BF0B7D,
+        0x18DB0601068EDD9F,
+        0xB76B908233200DC5,
+        0xE478235FA8E22D5E,
+    );
+    let b = _mm256_set_epi64x(
+        0x672F6F105A94CEA7,
+        0x8298B8FFCA5F829C,
+        0xA3927047B3FB61D8,
+        0x978093862CDE7187,
+    );
+
+    let a_decomp = transmute::<_, [__m128i; 2]>(a);
+    let b_decomp = transmute::<_, [__m128i; 2]>(b);
+
+    let r = vectorized(a, b);
+
+    let e_decomp = [linear(a_decomp[0], b_decomp[0]), linear(a_decomp[1], b_decomp[1])];
+    let e = transmute::<_, __m256i>(e_decomp);
+
+    assert_eq_m256i(r, e)
+}
+
+#[target_feature(enable = "vpclmulqdq,avx512f")]
+unsafe fn test_mm512_clmulepi64_epi128() {
+    verify_kat_pclmul!(_mm512_broadcast_i32x4, _mm512_clmulepi64_epi128, assert_eq_m512i);
+
+    verify_512_helper(
+        |a, b| _mm_clmulepi64_si128::<0x00>(a, b),
+        |a, b| _mm512_clmulepi64_epi128::<0x00>(a, b),
+    );
+    verify_512_helper(
+        |a, b| _mm_clmulepi64_si128::<0x01>(a, b),
+        |a, b| _mm512_clmulepi64_epi128::<0x01>(a, b),
+    );
+    verify_512_helper(
+        |a, b| _mm_clmulepi64_si128::<0x10>(a, b),
+        |a, b| _mm512_clmulepi64_epi128::<0x10>(a, b),
+    );
+    verify_512_helper(
+        |a, b| _mm_clmulepi64_si128::<0x11>(a, b),
+        |a, b| _mm512_clmulepi64_epi128::<0x11>(a, b),
+    );
+}
+
+#[target_feature(enable = "vpclmulqdq")]
+unsafe fn test_mm256_clmulepi64_epi128() {
+    verify_kat_pclmul!(_mm256_broadcastsi128_si256, _mm256_clmulepi64_epi128, assert_eq_m256i);
+
+    verify_256_helper(
+        |a, b| _mm_clmulepi64_si128::<0x00>(a, b),
+        |a, b| _mm256_clmulepi64_epi128::<0x00>(a, b),
+    );
+    verify_256_helper(
+        |a, b| _mm_clmulepi64_si128::<0x01>(a, b),
+        |a, b| _mm256_clmulepi64_epi128::<0x01>(a, b),
+    );
+    verify_256_helper(
+        |a, b| _mm_clmulepi64_si128::<0x10>(a, b),
+        |a, b| _mm256_clmulepi64_epi128::<0x10>(a, b),
+    );
+    verify_256_helper(
+        |a, b| _mm_clmulepi64_si128::<0x11>(a, b),
+        |a, b| _mm256_clmulepi64_epi128::<0x11>(a, b),
+    );
+}
+
+#[track_caller]
+#[target_feature(enable = "avx512f")]
+unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
+    assert_eq!(transmute::<_, [u64; 8]>(a), transmute::<_, [u64; 8]>(b))
+}
+
+#[track_caller]
+#[target_feature(enable = "avx")]
+unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) {
+    assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b))
+}