From d38fddd89908cae6767ad599388421f99f71840b Mon Sep 17 00:00:00 2001 From: xFrednet Date: Sun, 24 Oct 2021 21:06:17 +0200 Subject: [PATCH] Refactor `camel_case` util functions for new `StrIndex` --- clippy_lints/src/enum_variants.rs | 16 ++-- clippy_utils/src/lib.rs | 2 +- clippy_utils/src/str_utils.rs | 144 +++++++++++++++++++----------- 3 files changed, 103 insertions(+), 59 deletions(-) diff --git a/clippy_lints/src/enum_variants.rs b/clippy_lints/src/enum_variants.rs index 8d6f7d6fdf1..19f1781d0b0 100644 --- a/clippy_lints/src/enum_variants.rs +++ b/clippy_lints/src/enum_variants.rs @@ -1,8 +1,8 @@ //! lint on enum variants that are prefixed or suffixed by the same characters -use clippy_utils::str_utils; use clippy_utils::diagnostics::{span_lint, span_lint_and_help}; use clippy_utils::source::is_present_in_source; +use clippy_utils::str_utils; use rustc_hir::{EnumDef, Item, ItemKind}; use rustc_lint::{LateContext, LateLintPass}; use rustc_session::{declare_tool_lint, impl_lint_pass}; @@ -171,14 +171,14 @@ fn check_variant( } } let first = &def.variants[0].ident.name.as_str(); - let mut pre = &first[..str_utils::until(&*first)]; - let mut post = &first[str_utils::from(&*first)..]; + let mut pre = &first[..str_utils::camel_case_until(&*first).byte_index]; + let mut post = &first[str_utils::camel_case_start(&*first).byte_index..]; for var in def.variants { let name = var.ident.name.as_str(); let pre_match = partial_match(pre, &name); pre = &pre[..pre_match]; - let pre_camel = str_utils::until(pre); + let pre_camel = str_utils::camel_case_until(pre).byte_index; pre = &pre[..pre_camel]; while let Some((next, last)) = name[pre.len()..].chars().zip(pre.chars().rev()).next() { if next.is_numeric() { @@ -186,8 +186,8 @@ fn check_variant( } if next.is_lowercase() { let last = pre.len() - last.len_utf8(); - let last_camel = str_utils::until(&pre[..last]); - pre = &pre[..last_camel]; + let last_camel = str_utils::camel_case_until(&pre[..last]); + pre = &pre[..last_camel.byte_index]; } else { break; } @@ -196,8 +196,8 @@ fn check_variant( let post_match = partial_rmatch(post, &name); let post_end = post.len() - post_match; post = &post[post_end..]; - let post_camel = str_utils::from(post); - post = &post[post_camel..]; + let post_camel = str_utils::camel_case_start(post); + post = &post[post_camel.byte_index..]; } let (what, value) = match (pre.is_empty(), post.is_empty()) { (true, true) => return, diff --git a/clippy_utils/src/lib.rs b/clippy_utils/src/lib.rs index 1c5186dba1b..0637ae3e97b 100644 --- a/clippy_utils/src/lib.rs +++ b/clippy_utils/src/lib.rs @@ -37,7 +37,6 @@ pub mod sym_helper; #[allow(clippy::module_name_repetitions)] pub mod ast_utils; pub mod attrs; -pub mod str_utils; pub mod comparisons; pub mod consts; pub mod diagnostics; @@ -50,6 +49,7 @@ pub mod paths; pub mod ptr; pub mod qualify_min_const_fn; pub mod source; +pub mod str_utils; pub mod sugg; pub mod ty; pub mod usage; diff --git a/clippy_utils/src/str_utils.rs b/clippy_utils/src/str_utils.rs index a6636e39137..f3de1250d5d 100644 --- a/clippy_utils/src/str_utils.rs +++ b/clippy_utils/src/str_utils.rs @@ -1,117 +1,161 @@ +/// Dealing with sting indices can be hard, this struct ensures that both the +/// character and byte index are provided for correct indexing. +#[derive(Debug, Default, PartialEq, Eq)] +pub struct StrIndex { + pub char_index: usize, + pub byte_index: usize, +} + +impl StrIndex { + pub fn new(char_index: usize, byte_index: usize) -> Self { + Self { char_index, byte_index } + } +} + /// Returns the index of the character after the first camel-case component of `s`. +/// +/// ``` +/// assert_eq!(camel_case_until("AbcDef"), StrIndex::new(6, 6)); +/// assert_eq!(camel_case_until("ABCD"), StrIndex::new(0, 0)); +/// assert_eq!(camel_case_until("AbcDD"), StrIndex::new(3, 3)); +/// assert_eq!(camel_case_until("Abc\u{f6}\u{f6}DD"), StrIndex::new(5, 7)); +/// ``` #[must_use] -pub fn until(s: &str) -> usize { - let mut iter = s.char_indices(); - if let Some((_, first)) = iter.next() { +pub fn camel_case_until(s: &str) -> StrIndex { + let mut iter = s.char_indices().enumerate(); + if let Some((_char_index, (_, first))) = iter.next() { if !first.is_uppercase() { - return 0; + return StrIndex::new(0, 0); } } else { - return 0; + return StrIndex::new(0, 0); } let mut up = true; - let mut last_i = 0; - for (i, c) in iter { + let mut last_index = StrIndex::new(0, 0); + for (char_index, (byte_index, c)) in iter { if up { if c.is_lowercase() { up = false; } else { - return last_i; + return last_index; } } else if c.is_uppercase() { up = true; - last_i = i; + last_index.byte_index = byte_index; + last_index.char_index = char_index; } else if !c.is_lowercase() { - return i; + return StrIndex::new(char_index, byte_index); } } - if up { last_i } else { s.len() } + + if up { + last_index + } else { + StrIndex::new(s.chars().count(), s.len()) + } } /// Returns index of the last camel-case component of `s`. +/// +/// ``` +/// assert_eq!(camel_case_start("AbcDef"), StrIndex::new(0, 0)); +/// assert_eq!(camel_case_start("abcDef"), StrIndex::new(3, 3)); +/// assert_eq!(camel_case_start("ABCD"), StrIndex::new(4, 4)); +/// assert_eq!(camel_case_start("abcd"), StrIndex::new(4, 4)); +/// assert_eq!(camel_case_start("\u{f6}\u{f6}cd"), StrIndex::new(4, 6)); +/// ``` #[must_use] -pub fn from(s: &str) -> usize { - let mut iter = s.char_indices().rev(); - if let Some((_, first)) = iter.next() { +pub fn camel_case_start(s: &str) -> StrIndex { + let char_count = s.chars().count(); + let range = 0..char_count; + let mut iter = range.rev().zip(s.char_indices().rev()); + if let Some((char_index, (_, first))) = iter.next() { if !first.is_lowercase() { - return s.len(); + return StrIndex::new(char_index, s.len()); } } else { - return s.len(); + return StrIndex::new(char_count, s.len()); } let mut down = true; - let mut last_i = s.len(); - for (i, c) in iter { + let mut last_index = StrIndex::new(char_count, s.len()); + for (char_index, (byte_index, c)) in iter { if down { if c.is_uppercase() { down = false; - last_i = i; + last_index.byte_index = byte_index; + last_index.char_index = char_index; } else if !c.is_lowercase() { - return last_i; + return last_index; } } else if c.is_lowercase() { down = true; } else if c.is_uppercase() { - last_i = i; + last_index.byte_index = byte_index; + last_index.char_index = char_index; } else { - return last_i; + return last_index; } } - last_i + last_index } #[cfg(test)] mod test { - use super::{from, until}; + use super::*; #[test] - fn from_full() { - assert_eq!(from("AbcDef"), 0); - assert_eq!(from("Abc"), 0); - assert_eq!(from("ABcd"), 0); - assert_eq!(from("ABcdEf"), 0); - assert_eq!(from("AabABcd"), 0); + fn camel_case_start_full() { + assert_eq!(camel_case_start("AbcDef"), StrIndex::new(0, 0)); + assert_eq!(camel_case_start("Abc"), StrIndex::new(0, 0)); + assert_eq!(camel_case_start("ABcd"), StrIndex::new(0, 0)); + assert_eq!(camel_case_start("ABcdEf"), StrIndex::new(0, 0)); + assert_eq!(camel_case_start("AabABcd"), StrIndex::new(0, 0)); } #[test] - fn from_partial() { - assert_eq!(from("abcDef"), 3); - assert_eq!(from("aDbc"), 1); - assert_eq!(from("aabABcd"), 3); + fn camel_case_start_partial() { + assert_eq!(camel_case_start("abcDef"), StrIndex::new(3, 3)); + assert_eq!(camel_case_start("aDbc"), StrIndex::new(1, 1)); + assert_eq!(camel_case_start("aabABcd"), StrIndex::new(3, 3)); + assert_eq!(camel_case_start("\u{f6}\u{f6}AabABcd"), StrIndex::new(2, 4)); } #[test] - fn from_not() { - assert_eq!(from("AbcDef_"), 7); - assert_eq!(from("AbcDD"), 5); + fn camel_case_start_not() { + assert_eq!(camel_case_start("AbcDef_"), StrIndex::new(7, 7)); + assert_eq!(camel_case_start("AbcDD"), StrIndex::new(5, 5)); + assert_eq!(camel_case_start("all_small"), StrIndex::new(9, 9)); + assert_eq!(camel_case_start("\u{f6}_all_small"), StrIndex::new(11, 12)); } #[test] - fn from_caps() { - assert_eq!(from("ABCD"), 4); + fn camel_case_start_caps() { + assert_eq!(camel_case_start("ABCD"), StrIndex::new(4, 4)); } #[test] - fn until_full() { - assert_eq!(until("AbcDef"), 6); - assert_eq!(until("Abc"), 3); + fn camel_case_until_full() { + assert_eq!(camel_case_until("AbcDef"), StrIndex::new(6, 6)); + assert_eq!(camel_case_until("Abc"), StrIndex::new(3, 3)); + assert_eq!(camel_case_until("Abc\u{f6}\u{f6}\u{f6}"), StrIndex::new(6, 9)); } #[test] - fn until_not() { - assert_eq!(until("abcDef"), 0); - assert_eq!(until("aDbc"), 0); + fn camel_case_until_not() { + assert_eq!(camel_case_until("abcDef"), StrIndex::new(0, 0)); + assert_eq!(camel_case_until("aDbc"), StrIndex::new(0, 0)); } #[test] - fn until_partial() { - assert_eq!(until("AbcDef_"), 6); - assert_eq!(until("CallTypeC"), 8); - assert_eq!(until("AbcDD"), 3); + fn camel_case_until_partial() { + assert_eq!(camel_case_until("AbcDef_"), StrIndex::new(6, 6)); + assert_eq!(camel_case_until("CallTypeC"), StrIndex::new(8, 8)); + assert_eq!(camel_case_until("AbcDD"), StrIndex::new(3, 3)); + assert_eq!(camel_case_until("Abc\u{f6}\u{f6}DD"), StrIndex::new(5, 7)); } #[test] fn until_caps() { - assert_eq!(until("ABCD"), 0); + assert_eq!(camel_case_until("ABCD"), StrIndex::new(0, 0)); } }