From 0349f2ae8a7462c72345717671269224f50ec606 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 9 Mar 2014 22:55:43 +0100 Subject: [PATCH 1/3] libstd: Change `slice_shift_char`, `shift_char`, `pop_char`, `shift_byte` and `pop_byte` to return an Option instead of failing --- src/libglob/lib.rs | 11 +++-- src/libstd/str.rs | 87 ++++++++++++++++++++------------- src/test/run-pass/utf8_chars.rs | 8 +-- 3 files changed, 63 insertions(+), 43 deletions(-) diff --git a/src/libglob/lib.rs b/src/libglob/lib.rs index 819e0949e3b..a33d041ab20 100644 --- a/src/libglob/lib.rs +++ b/src/libglob/lib.rs @@ -369,11 +369,11 @@ impl Pattern { return EntirePatternDoesntMatch; } - let (c, next) = file.slice_shift_char(); - if require_literal(c) { + let (some_c, next) = file.slice_shift_char(); + if require_literal(some_c.unwrap()) { return SubPatternDoesntMatch; } - prev_char.set(Some(c)); + prev_char.set(some_c); file = next; } } @@ -382,7 +382,8 @@ impl Pattern { return EntirePatternDoesntMatch; } - let (c, next) = file.slice_shift_char(); + let (some_c, next) = file.slice_shift_char(); + let c = some_c.unwrap(); let matches = match *token { AnyChar => { !require_literal(c) @@ -403,7 +404,7 @@ impl Pattern { if !matches { return SubPatternDoesntMatch; } - prev_char.set(Some(c)); + prev_char.set(some_c); file = next; } } diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 3464c4a1128..57b3dc3a252 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -1355,6 +1355,7 @@ pub mod raw { use libc; use ptr; use ptr::RawPtr; + use option::{Option, Some, None}; use str::{is_utf8, OwnedStr, StrSlice}; use vec; use vec::{MutableVector, ImmutableVector, OwnedVector}; @@ -1465,22 +1466,28 @@ pub mod raw { /// Removes the last byte from a string and returns it. /// The caller must preserve the valid UTF-8 property. - pub unsafe fn pop_byte(s: &mut ~str) -> u8 { + pub unsafe fn pop_byte(s: &mut ~str) -> Option { let len = s.len(); - assert!((len > 0u)); - let b = s[len - 1u]; - s.set_len(len - 1); - return b; + if len == 0u { + return None; + } else { + let b = s[len - 1u]; + s.set_len(len - 1); + return Some(b); + } } /// Removes the first byte from a string and returns it. /// The caller must preserve the valid UTF-8 property. - pub unsafe fn shift_byte(s: &mut ~str) -> u8 { + pub unsafe fn shift_byte(s: &mut ~str) -> Option { let len = s.len(); - assert!((len > 0u)); - let b = s[0]; - *s = s.slice(1, len).to_owned(); - return b; + if len == 0u { + return None; + } else { + let b = s[0]; + *s = s.slice(1, len).to_owned(); + return Some(b); + } } /// Access the str in its vector representation. @@ -2291,7 +2298,7 @@ pub trait StrSlice<'a> { /// assert_eq!(c, 'ö'); /// assert_eq!(s2, "we 老虎 Léopard"); /// ``` - fn slice_shift_char(&self) -> (char, &'a str); + fn slice_shift_char(&self) -> (Option, &'a str); /// Levenshtein Distance between two strings. fn lev_distance(&self, t: &str) -> uint; @@ -2744,10 +2751,14 @@ impl<'a> StrSlice<'a> for &'a str { } #[inline] - fn slice_shift_char(&self) -> (char, &'a str) { - let CharRange {ch, next} = self.char_range_at(0u); - let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) }; - return (ch, next_s); + fn slice_shift_char(&self) -> (Option, &'a str) { + if self.is_empty() { + return (None, *self); + } else { + let CharRange {ch, next} = self.char_range_at(0u); + let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) }; + return (Some(ch), next_s); + } } fn lev_distance(&self, t: &str) -> uint { @@ -2815,14 +2826,14 @@ pub trait OwnedStr { /// # Failure /// /// If the string does not contain any characters - fn pop_char(&mut self) -> char; + fn pop_char(&mut self) -> Option; /// Remove the first character from a string and return it /// /// # Failure /// /// If the string does not contain any characters - fn shift_char(&mut self) -> char; + fn shift_char(&mut self) -> Option; /// Prepend a char to a string fn unshift_char(&mut self, ch: char); @@ -2925,19 +2936,26 @@ impl OwnedStr for ~str { } #[inline] - fn pop_char(&mut self) -> char { + fn pop_char(&mut self) -> Option { let end = self.len(); - assert!(end > 0u); - let CharRange {ch, next} = self.char_range_at_reverse(end); - unsafe { self.set_len(next); } - return ch; + if end == 0u { + return None; + } else { + let CharRange {ch, next} = self.char_range_at_reverse(end); + unsafe { self.set_len(next); } + return Some(ch); + } } #[inline] - fn shift_char(&mut self) -> char { - let CharRange {ch, next} = self.char_range_at(0u); - *self = self.slice(next, self.len()).to_owned(); - return ch; + fn shift_char(&mut self) -> Option { + if self.is_empty() { + return None; + } else { + let CharRange {ch, next} = self.char_range_at(0u); + *self = self.slice(next, self.len()).to_owned(); + return Some(ch); + } } #[inline] @@ -3148,7 +3166,7 @@ mod tests { let mut data = ~"ประเทศไทย中华"; let cc = data.pop_char(); assert_eq!(~"ประเทศไทย中", data); - assert_eq!('华', cc); + assert_eq!(Some('华'), cc); } #[test] @@ -3156,14 +3174,15 @@ mod tests { let mut data2 = ~"华"; let cc2 = data2.pop_char(); assert_eq!(~"", data2); - assert_eq!('华', cc2); + assert_eq!(Some('华'), cc2); } #[test] - #[should_fail] - fn test_pop_char_fail() { + fn test_pop_char_empty() { let mut data = ~""; - let _cc3 = data.pop_char(); + let cc3 = data.pop_char(); + assert_eq!(~"", data); + assert_eq!(None, cc3); } #[test] @@ -3182,7 +3201,7 @@ mod tests { let mut data = ~"ประเทศไทย中"; let cc = data.shift_char(); assert_eq!(~"ระเทศไทย中", data); - assert_eq!('ป', cc); + assert_eq!(Some('ป'), cc); } #[test] @@ -3611,7 +3630,7 @@ mod tests { let mut s = ~"ABC"; let b = unsafe{raw::shift_byte(&mut s)}; assert_eq!(s, ~"BC"); - assert_eq!(b, 65u8); + assert_eq!(b, Some(65u8)); } #[test] @@ -3619,7 +3638,7 @@ mod tests { let mut s = ~"ABC"; let b = unsafe{raw::pop_byte(&mut s)}; assert_eq!(s, ~"AB"); - assert_eq!(b, 67u8); + assert_eq!(b, Some(67u8)); } #[test] diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index 3066a247e4e..34ee0933d2a 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -42,12 +42,12 @@ pub fn main() { assert!((!str::is_utf8([0xf0_u8, 0xff_u8, 0xff_u8, 0x10_u8]))); let mut stack = ~"a×c€"; - assert_eq!(stack.pop_char(), '€'); - assert_eq!(stack.pop_char(), 'c'); + assert_eq!(stack.pop_char(), Some('€')); + assert_eq!(stack.pop_char(), Some('c')); stack.push_char('u'); assert!(stack == ~"a×u"); - assert_eq!(stack.shift_char(), 'a'); - assert_eq!(stack.shift_char(), '×'); + assert_eq!(stack.shift_char(), Some('a')); + assert_eq!(stack.shift_char(), Some('×')); stack.unshift_char('ß'); assert!(stack == ~"ßu"); } From 262d1543db6eba077e9f31a4e734c337746e7ea8 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 9 Mar 2014 22:56:33 +0100 Subject: [PATCH 2/3] libstd: Add unit tests for `slice_shift_char` --- src/libstd/str.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 57b3dc3a252..f5bd7e75c3a 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -3618,6 +3618,18 @@ mod tests { assert!(!" _ ".is_whitespace()); } + #[test] + fn test_slice_shift_char() { + let data = "ประเทศไทย中"; + assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中")); + } + + #[test] + fn test_slice_shift_char_2() { + let empty = ""; + assert_eq!(empty.slice_shift_char(), (None, "")); + } + #[test] fn test_push_byte() { let mut s = ~"ABC"; From b0e855a7584b9893d9fe19d2eb9e7272e0c90a6f Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 9 Mar 2014 22:57:22 +0100 Subject: [PATCH 3/3] libstd: Update docs for `slice_shift_char` and {shift,pop}_{char,byte} --- src/libstd/str.rs | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/src/libstd/str.rs b/src/libstd/str.rs index f5bd7e75c3a..5bd14e717b1 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -1465,6 +1465,7 @@ pub mod raw { } /// Removes the last byte from a string and returns it. + /// Returns None when an empty string is passed. /// The caller must preserve the valid UTF-8 property. pub unsafe fn pop_byte(s: &mut ~str) -> Option { let len = s.len(); @@ -1478,6 +1479,7 @@ pub mod raw { } /// Removes the first byte from a string and returns it. + /// Returns None when an empty string is passed. /// The caller must preserve the valid UTF-8 property. pub unsafe fn shift_byte(s: &mut ~str) -> Option { let len = s.len(); @@ -2280,22 +2282,19 @@ pub trait StrSlice<'a> { /// Retrieves the first character from a string slice and returns /// it. This does not allocate a new string; instead, it returns a /// slice that point one character beyond the character that was - /// shifted. - /// - /// # Failure - /// - /// If the string does not contain any characters. + /// shifted. If the string does not contain any characters, + /// a tuple of None and an empty string is returned instead. /// /// # Example /// /// ```rust /// let s = "Löwe 老虎 Léopard"; /// let (c, s1) = s.slice_shift_char(); - /// assert_eq!(c, 'L'); + /// assert_eq!(c, Some('L')); /// assert_eq!(s1, "öwe 老虎 Léopard"); /// /// let (c, s2) = s1.slice_shift_char(); - /// assert_eq!(c, 'ö'); + /// assert_eq!(c, Some('ö')); /// assert_eq!(s2, "we 老虎 Léopard"); /// ``` fn slice_shift_char(&self) -> (Option, &'a str); @@ -2821,18 +2820,12 @@ pub trait OwnedStr { /// Appends a character to the back of a string fn push_char(&mut self, c: char); - /// Remove the final character from a string and return it - /// - /// # Failure - /// - /// If the string does not contain any characters + /// Remove the final character from a string and return it. Return None + /// when the string is empty. fn pop_char(&mut self) -> Option; - /// Remove the first character from a string and return it - /// - /// # Failure - /// - /// If the string does not contain any characters + /// Remove the first character from a string and return it. Return None + /// when the string is empty. fn shift_char(&mut self) -> Option; /// Prepend a char to a string