From 3c1e0afa58dbe1287c1e9a9ea704f9b76b6845cc Mon Sep 17 00:00:00 2001 From: Quinn Sinclair Date: Sun, 19 Nov 2023 11:33:01 +0100 Subject: [PATCH] New Lint [`impl_hash_with_borrow_str_and_bytes`] Implements a lint to prevent implementation of Hash, Borrow and Borrow<[u8]> as it breaks Borrow "semantics". According to the book, types that implement Borrow and Borrow must ensure equality of borrow results under Eq,Ord and Hash. > In particular Eq, Ord and Hash must be equivalent for borrowed and owned values: x.borrow() == y.borrow() should give the same result as x == y. In the same way, hash(x) == hash(x as Borrow<[u8]>) != hash(x as Borrow). changelog: newlint [`impl_hash_with_borrow_str_and_bytes`] --- CHANGELOG.md | 1 + clippy_lints/src/declared_lints.rs | 1 + .../impl_hash_with_borrow_str_and_bytes.rs | 106 ++++++++++++++ clippy_lints/src/lib.rs | 2 + .../ui/impl_hash_with_borrow_str_and_bytes.rs | 136 ++++++++++++++++++ ...impl_hash_with_borrow_str_and_bytes.stderr | 41 ++++++ 6 files changed, 287 insertions(+) create mode 100644 clippy_lints/src/impl_hash_with_borrow_str_and_bytes.rs create mode 100644 tests/ui/impl_hash_with_borrow_str_and_bytes.rs create mode 100644 tests/ui/impl_hash_with_borrow_str_and_bytes.stderr diff --git a/CHANGELOG.md b/CHANGELOG.md index e74df808e06..1760d19b385 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5128,6 +5128,7 @@ Released 2018-09-13 [`if_then_some_else_none`]: https://rust-lang.github.io/rust-clippy/master/index.html#if_then_some_else_none [`ifs_same_cond`]: https://rust-lang.github.io/rust-clippy/master/index.html#ifs_same_cond [`ignored_unit_patterns`]: https://rust-lang.github.io/rust-clippy/master/index.html#ignored_unit_patterns +[`impl_hash_borrow_with_str_and_bytes`]: https://rust-lang.github.io/rust-clippy/master/index.html#impl_hash_borrow_with_str_and_bytes [`impl_trait_in_params`]: https://rust-lang.github.io/rust-clippy/master/index.html#impl_trait_in_params [`implicit_clone`]: https://rust-lang.github.io/rust-clippy/master/index.html#implicit_clone [`implicit_hasher`]: https://rust-lang.github.io/rust-clippy/master/index.html#implicit_hasher diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs index 85854a0dfb7..55dedc0a658 100644 --- a/clippy_lints/src/declared_lints.rs +++ b/clippy_lints/src/declared_lints.rs @@ -204,6 +204,7 @@ crate::if_not_else::IF_NOT_ELSE_INFO, crate::if_then_some_else_none::IF_THEN_SOME_ELSE_NONE_INFO, crate::ignored_unit_patterns::IGNORED_UNIT_PATTERNS_INFO, + crate::impl_hash_with_borrow_str_and_bytes::IMPL_HASH_BORROW_WITH_STR_AND_BYTES_INFO, crate::implicit_hasher::IMPLICIT_HASHER_INFO, crate::implicit_return::IMPLICIT_RETURN_INFO, crate::implicit_saturating_add::IMPLICIT_SATURATING_ADD_INFO, diff --git a/clippy_lints/src/impl_hash_with_borrow_str_and_bytes.rs b/clippy_lints/src/impl_hash_with_borrow_str_and_bytes.rs new file mode 100644 index 00000000000..9374582b54b --- /dev/null +++ b/clippy_lints/src/impl_hash_with_borrow_str_and_bytes.rs @@ -0,0 +1,106 @@ +use clippy_utils::diagnostics::span_lint_and_then; +use clippy_utils::ty::implements_trait; +use rustc_hir::def::{DefKind, Res}; +use rustc_hir::{Item, ItemKind, Path, TraitRef}; +use rustc_lint::{LateContext, LateLintPass}; +use rustc_middle::ty::Ty; +use rustc_session::{declare_lint_pass, declare_tool_lint}; +use rustc_span::symbol::sym; + +declare_clippy_lint! { + /// ### What it does + /// + /// This lint is concerned with the semantics of `Borrow` and `Hash` for a + /// type that implements all three of `Hash`, `Borrow` and `Borrow<[u8]>` + /// as it is impossible to satisfy the semantics of Borrow and `Hash` for + /// both `Borrow` and `Borrow<[u8]>`. + /// + /// ### Why is this bad? + /// + /// When providing implementations for `Borrow`, one should consider whether the different + /// implementations should act as facets or representations of the underlying type. Generic code + /// typically uses `Borrow` when it relies on the identical behavior of these additional trait + /// implementations. These traits will likely appear as additional trait bounds. + /// + /// In particular `Eq`, `Ord` and `Hash` must be equivalent for borrowed and owned values: + /// `x.borrow() == y.borrow()` should give the same result as `x == y`. + /// It follows then that the following equivalence must hold: + /// `hash(x) == hash((x as Borrow<[u8]>).borrow()) == hash((x as Borrow).borrow())` + /// + /// Unfortunately it doesn't hold as `hash("abc") != hash("abc".as_bytes())`. + /// This happens because the `Hash` impl for str passes an additional `0xFF` byte to + /// the hasher to avoid collisions. For example, given the tuples `("a", "bc")`, and `("ab", "c")`, + /// the two tuples would have the same hash value if the `0xFF` byte was not added. + /// + /// ### Example + /// + /// ``` + /// use std::borrow::Borrow; + /// use std::hash::{Hash, Hasher}; + /// + /// struct ExampleType { + /// data: String + /// } + /// + /// impl Hash for ExampleType { + /// fn hash(&self, state: &mut H) { + /// self.data.hash(state); + /// } + /// } + /// + /// impl Borrow for ExampleType { + /// fn borrow(&self) -> &str { + /// &self.data + /// } + /// } + /// + /// impl Borrow<[u8]> for ExampleType { + /// fn borrow(&self) -> &[u8] { + /// self.data.as_bytes() + /// } + /// } + /// ``` + /// As a consequence, hashing a `&ExampleType` and hashing the result of the two + /// borrows will result in different values. + /// + #[clippy::version = "1.76.0"] + pub IMPL_HASH_BORROW_WITH_STR_AND_BYTES, + correctness, + "ensures that the semantics of `Borrow` for `Hash` are satisfied when `Borrow` and `Borrow<[u8]>` are implemented" +} + +declare_lint_pass!(ImplHashWithBorrowStrBytes => [IMPL_HASH_BORROW_WITH_STR_AND_BYTES]); + +impl LateLintPass<'_> for ImplHashWithBorrowStrBytes { + /// We are emitting this lint at the Hash impl of a type that implements all + /// three of `Hash`, `Borrow` and `Borrow<[u8]>`. + fn check_item(&mut self, cx: &LateContext<'_>, item: &Item<'_>) { + if let ItemKind::Impl(imp) = item.kind + && let Some(TraitRef {path: Path {span, res, ..}, ..}) = imp.of_trait + && let ty = cx.tcx.type_of(item.owner_id).instantiate_identity() + && let Some(hash_id) = cx.tcx.get_diagnostic_item(sym::Hash) + && Res::Def(DefKind::Trait, hash_id) == *res + && let Some(borrow_id) = cx.tcx.get_diagnostic_item(sym::Borrow) + // since we are in the `Hash` impl, we don't need to check for that. + // we need only to check for `Borrow` and `Borrow<[u8]>` + && implements_trait(cx, ty, borrow_id, &[cx.tcx.types.str_.into()]) + && implements_trait(cx, ty, borrow_id, &[Ty::new_slice(cx.tcx, cx.tcx.types.u8).into()]) + { + span_lint_and_then( + cx, + IMPL_HASH_BORROW_WITH_STR_AND_BYTES, + *span, + "the semantics of `Borrow` around `Hash` can't be satisfied when both `Borrow` and `Borrow<[u8]>` are implemented", + |diag| { + diag.note("the `Borrow` semantics require that `Hash` must behave the same for all implementations of Borrow"); + diag.note( + "however, the hash implementations of a string (`str`) and the bytes of a string `[u8]` do not behave the same ..." + ); + diag.note("... as (`hash(\"abc\") != hash(\"abc\".as_bytes())`"); + diag.help("consider either removing one of the `Borrow` implementations (`Borrow` or `Borrow<[u8]>`) ..."); + diag.help("... or not implementing `Hash` for this type"); + }, + ); + } + } +} diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs index c462c933082..952625c78be 100644 --- a/clippy_lints/src/lib.rs +++ b/clippy_lints/src/lib.rs @@ -144,6 +144,7 @@ mod if_not_else; mod if_then_some_else_none; mod ignored_unit_patterns; +mod impl_hash_with_borrow_str_and_bytes; mod implicit_hasher; mod implicit_return; mod implicit_saturating_add; @@ -1066,6 +1067,7 @@ pub fn register_lints(store: &mut rustc_lint::LintStore, conf: &'static Conf) { store.register_late_pass(move |_| Box::new(manual_hash_one::ManualHashOne::new(msrv()))); store.register_late_pass(|_| Box::new(iter_without_into_iter::IterWithoutIntoIter)); store.register_late_pass(|_| Box::new(iter_over_hash_type::IterOverHashType)); + store.register_late_pass(|_| Box::new(impl_hash_with_borrow_str_and_bytes::ImplHashWithBorrowStrBytes)); // add lints here, do not remove this comment, it's used in `new_lint` } diff --git a/tests/ui/impl_hash_with_borrow_str_and_bytes.rs b/tests/ui/impl_hash_with_borrow_str_and_bytes.rs new file mode 100644 index 00000000000..f6ce6153e01 --- /dev/null +++ b/tests/ui/impl_hash_with_borrow_str_and_bytes.rs @@ -0,0 +1,136 @@ +#![warn(clippy::impl_hash_borrow_with_str_and_bytes)] + +use std::borrow::Borrow; +use std::hash::{Hash, Hasher}; + +struct ExampleType { + data: String, +} + +impl Hash for ExampleType { + //~^ ERROR: can't + fn hash(&self, state: &mut H) { + self.data.hash(state); + } +} + +impl Borrow for ExampleType { + fn borrow(&self) -> &str { + &self.data + } +} + +impl Borrow<[u8]> for ExampleType { + fn borrow(&self) -> &[u8] { + self.data.as_bytes() + } +} + +struct ShouldNotRaiseForHash {} +impl Hash for ShouldNotRaiseForHash { + fn hash(&self, state: &mut H) { + todo!(); + } +} + +struct ShouldNotRaiseForBorrow {} +impl Borrow for ShouldNotRaiseForBorrow { + fn borrow(&self) -> &str { + todo!(); + } +} +impl Borrow<[u8]> for ShouldNotRaiseForBorrow { + fn borrow(&self) -> &[u8] { + todo!(); + } +} + +struct ShouldNotRaiseForHashBorrowStr {} +impl Hash for ShouldNotRaiseForHashBorrowStr { + fn hash(&self, state: &mut H) { + todo!(); + } +} +impl Borrow for ShouldNotRaiseForHashBorrowStr { + fn borrow(&self) -> &str { + todo!(); + } +} + +struct ShouldNotRaiseForHashBorrowSlice {} +impl Hash for ShouldNotRaiseForHashBorrowSlice { + fn hash(&self, state: &mut H) { + todo!(); + } +} + +impl Borrow<[u8]> for ShouldNotRaiseForHashBorrowSlice { + fn borrow(&self) -> &[u8] { + todo!(); + } +} + +#[derive(Hash)] +//~^ ERROR: can't +struct Derived { + data: String, +} + +impl Borrow for Derived { + fn borrow(&self) -> &str { + self.data.as_str() + } +} + +impl Borrow<[u8]> for Derived { + fn borrow(&self) -> &[u8] { + self.data.as_bytes() + } +} + +struct GenericExampleType { + data: T, +} + +impl Hash for GenericExampleType { + fn hash(&self, state: &mut H) { + self.data.hash(state); + } +} + +impl Borrow for GenericExampleType { + fn borrow(&self) -> &str { + &self.data + } +} + +impl Borrow<[u8]> for GenericExampleType<&'static [u8]> { + fn borrow(&self) -> &[u8] { + self.data + } +} + +struct GenericExampleType2 { + data: T, +} + +impl Hash for GenericExampleType2 { + //~^ ERROR: can't + // this is correctly throwing an error for generic with concrete impl + // for all 3 types + fn hash(&self, state: &mut H) { + self.data.hash(state); + } +} + +impl Borrow for GenericExampleType2 { + fn borrow(&self) -> &str { + &self.data + } +} + +impl Borrow<[u8]> for GenericExampleType2 { + fn borrow(&self) -> &[u8] { + self.data.as_bytes() + } +} diff --git a/tests/ui/impl_hash_with_borrow_str_and_bytes.stderr b/tests/ui/impl_hash_with_borrow_str_and_bytes.stderr new file mode 100644 index 00000000000..afc35ef9845 --- /dev/null +++ b/tests/ui/impl_hash_with_borrow_str_and_bytes.stderr @@ -0,0 +1,41 @@ +error: the semantics of `Borrow` around `Hash` can't be satisfied when both `Borrow` and `Borrow<[u8]>` are implemented + --> $DIR/impl_hash_with_borrow_str_and_bytes.rs:10:6 + | +LL | impl Hash for ExampleType { + | ^^^^ + | + = note: the `Borrow` semantics require that `Hash` must behave the same for all implementations of Borrow + = note: however, the hash implementations of a string (`str`) and the bytes of a string `[u8]` do not behave the same ... + = note: ... as (`hash("abc") != hash("abc".as_bytes())` + = help: consider either removing one of the `Borrow` implementations (`Borrow` or `Borrow<[u8]>`) ... + = help: ... or not implementing `Hash` for this type + = note: `-D clippy::impl-hash-borrow-with-str-and-bytes` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::impl_hash_borrow_with_str_and_bytes)]` + +error: the semantics of `Borrow` around `Hash` can't be satisfied when both `Borrow` and `Borrow<[u8]>` are implemented + --> $DIR/impl_hash_with_borrow_str_and_bytes.rs:73:10 + | +LL | #[derive(Hash)] + | ^^^^ + | + = note: the `Borrow` semantics require that `Hash` must behave the same for all implementations of Borrow + = note: however, the hash implementations of a string (`str`) and the bytes of a string `[u8]` do not behave the same ... + = note: ... as (`hash("abc") != hash("abc".as_bytes())` + = help: consider either removing one of the `Borrow` implementations (`Borrow` or `Borrow<[u8]>`) ... + = help: ... or not implementing `Hash` for this type + = note: this error originates in the derive macro `Hash` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: the semantics of `Borrow` around `Hash` can't be satisfied when both `Borrow` and `Borrow<[u8]>` are implemented + --> $DIR/impl_hash_with_borrow_str_and_bytes.rs:117:6 + | +LL | impl Hash for GenericExampleType2 { + | ^^^^ + | + = note: the `Borrow` semantics require that `Hash` must behave the same for all implementations of Borrow + = note: however, the hash implementations of a string (`str`) and the bytes of a string `[u8]` do not behave the same ... + = note: ... as (`hash("abc") != hash("abc".as_bytes())` + = help: consider either removing one of the `Borrow` implementations (`Borrow` or `Borrow<[u8]>`) ... + = help: ... or not implementing `Hash` for this type + +error: aborting due to 3 previous errors +